summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/common
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-19 21:52:15 -0500
committertrav90 <travawine@palemoon.org>2018-10-19 21:52:20 -0500
commitbbcc64772580c8a979288791afa02d30bc476d2e (patch)
tree437ce94c3fdd7497508e5b55de06c6d011678597 /third_party/aom/av1/common
parent14805f6ddbfb173c327768fff9f81f40ce5e81b0 (diff)
downloadUXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.gz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.lz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.xz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.zip
Update aom to v1.0.0
Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0.
Diffstat (limited to 'third_party/aom/av1/common')
-rw-r--r--third_party/aom/av1/common/alloccommon.c386
-rw-r--r--third_party/aom/av1/common/alloccommon.h8
-rw-r--r--third_party/aom/av1/common/arm/av1_txfm_neon.c28
-rw-r--r--third_party/aom/av1/common/arm/blend_a64_hmask_neon.c134
-rw-r--r--third_party/aom/av1/common/arm/blend_a64_vmask_neon.c141
-rw-r--r--third_party/aom/av1/common/arm/cfl_neon.c584
-rw-r--r--third_party/aom/av1/common/arm/convolve_neon.c1134
-rw-r--r--third_party/aom/av1/common/arm/convolve_neon.h228
-rw-r--r--third_party/aom/av1/common/arm/intrapred_neon.c79
-rw-r--r--third_party/aom/av1/common/arm/jnt_convolve_neon.c1326
-rw-r--r--third_party/aom/av1/common/arm/mem_neon.h401
-rw-r--r--third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c228
-rw-r--r--third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c594
-rw-r--r--third_party/aom/av1/common/arm/reconinter_neon.c86
-rw-r--r--third_party/aom/av1/common/arm/transpose_neon.h422
-rw-r--r--third_party/aom/av1/common/arm/wiener_convolve_neon.c401
-rw-r--r--third_party/aom/av1/common/av1_fwd_txfm1d.c2355
-rw-r--r--third_party/aom/av1/common/av1_fwd_txfm1d.h61
-rw-r--r--third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h363
-rw-r--r--third_party/aom/av1/common/av1_fwd_txfm2d.c413
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d.c2726
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d.h66
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d_cfg.h382
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm2d.c616
-rw-r--r--third_party/aom/av1/common/av1_loopfilter.c4670
-rw-r--r--third_party/aom/av1/common/av1_loopfilter.h233
-rw-r--r--third_party/aom/av1/common/av1_rtcd.c6
-rwxr-xr-xthird_party/aom/av1/common/av1_rtcd_defs.pl764
-rw-r--r--third_party/aom/av1/common/av1_txfm.c110
-rw-r--r--third_party/aom/av1/common/av1_txfm.h319
-rw-r--r--third_party/aom/av1/common/blockd.c255
-rw-r--r--third_party/aom/av1/common/blockd.h1538
-rw-r--r--third_party/aom/av1/common/cdef.c231
-rw-r--r--third_party/aom/av1/common/cdef.h9
-rw-r--r--third_party/aom/av1/common/cdef_block.c391
-rw-r--r--third_party/aom/av1/common/cdef_block.h49
-rw-r--r--third_party/aom/av1/common/cdef_block_avx2.c2
-rw-r--r--third_party/aom/av1/common/cdef_block_neon.c2
-rw-r--r--third_party/aom/av1/common/cdef_block_simd.h813
-rw-r--r--third_party/aom/av1/common/cdef_block_sse2.c2
-rw-r--r--third_party/aom/av1/common/cdef_block_sse4.c2
-rw-r--r--third_party/aom/av1/common/cdef_block_ssse3.c2
-rw-r--r--third_party/aom/av1/common/cfl.c642
-rw-r--r--third_party/aom/av1/common/cfl.h274
-rw-r--r--third_party/aom/av1/common/clpf.c115
-rw-r--r--third_party/aom/av1/common/clpf_neon.c14
-rw-r--r--third_party/aom/av1/common/clpf_simd.h456
-rw-r--r--third_party/aom/av1/common/clpf_sse2.c14
-rw-r--r--third_party/aom/av1/common/clpf_sse4.c14
-rw-r--r--third_party/aom/av1/common/clpf_ssse3.c14
-rw-r--r--third_party/aom/av1/common/common.h3
-rw-r--r--third_party/aom/av1/common/common_data.h2023
-rw-r--r--third_party/aom/av1/common/convolve.c2126
-rw-r--r--third_party/aom/av1/common/convolve.h173
-rw-r--r--third_party/aom/av1/common/daala_tx.c4331
-rw-r--r--third_party/aom/av1/common/daala_tx.h53
-rw-r--r--third_party/aom/av1/common/debugmodes.c29
-rw-r--r--third_party/aom/av1/common/entropy.c2564
-rw-r--r--third_party/aom/av1/common/entropy.h361
-rw-r--r--third_party/aom/av1/common/entropymode.c6748
-rw-r--r--third_party/aom/av1/common/entropymode.h661
-rw-r--r--third_party/aom/av1/common/entropymv.c227
-rw-r--r--third_party/aom/av1/common/entropymv.h52
-rw-r--r--third_party/aom/av1/common/enums.h624
-rw-r--r--third_party/aom/av1/common/filter.c274
-rw-r--r--third_party/aom/av1/common/filter.h85
-rw-r--r--third_party/aom/av1/common/frame_buffers.c1
-rw-r--r--third_party/aom/av1/common/generic_code.c112
-rw-r--r--third_party/aom/av1/common/generic_code.h81
-rw-r--r--third_party/aom/av1/common/idct.c3091
-rw-r--r--third_party/aom/av1/common/idct.h57
-rw-r--r--third_party/aom/av1/common/laplace_tables.c657
-rw-r--r--third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c81
-rw-r--r--third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c62
-rw-r--r--third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c80
-rw-r--r--third_party/aom/av1/common/mv.h110
-rw-r--r--third_party/aom/av1/common/mvref_common.c2726
-rw-r--r--third_party/aom/av1/common/mvref_common.h445
-rw-r--r--third_party/aom/av1/common/ncobmc_kernels.c1181
-rw-r--r--third_party/aom/av1/common/ncobmc_kernels.h22
-rw-r--r--third_party/aom/av1/common/obmc.h39
-rw-r--r--third_party/aom/av1/common/odintrin.c10
-rw-r--r--third_party/aom/av1/common/odintrin.h180
-rw-r--r--third_party/aom/av1/common/onyxc_int.h1161
-rw-r--r--third_party/aom/av1/common/partition.c256
-rw-r--r--third_party/aom/av1/common/partition.h40
-rw-r--r--third_party/aom/av1/common/ppc/cfl_ppc.c153
-rw-r--r--third_party/aom/av1/common/pred_common.c1329
-rw-r--r--third_party/aom/av1/common/pred_common.h385
-rw-r--r--third_party/aom/av1/common/pvq.c1007
-rw-r--r--third_party/aom/av1/common/pvq.h179
-rw-r--r--third_party/aom/av1/common/pvq_state.c50
-rw-r--r--third_party/aom/av1/common/pvq_state.h52
-rw-r--r--third_party/aom/av1/common/quant_common.c585
-rw-r--r--third_party/aom/av1/common/quant_common.h71
-rw-r--r--third_party/aom/av1/common/reconinter.c3965
-rw-r--r--third_party/aom/av1/common/reconinter.h563
-rw-r--r--third_party/aom/av1/common/reconintra.c3190
-rw-r--r--third_party/aom/av1/common/reconintra.h98
-rw-r--r--third_party/aom/av1/common/resize.c769
-rw-r--r--third_party/aom/av1/common/resize.h43
-rw-r--r--third_party/aom/av1/common/restoration.c2684
-rw-r--r--third_party/aom/av1/common/restoration.h335
-rw-r--r--third_party/aom/av1/common/scale.c152
-rw-r--r--third_party/aom/av1/common/scale.h14
-rw-r--r--third_party/aom/av1/common/scan.c6307
-rw-r--r--third_party/aom/av1/common/scan.h101
-rw-r--r--third_party/aom/av1/common/seg_common.c39
-rw-r--r--third_party/aom/av1/common/seg_common.h60
-rw-r--r--third_party/aom/av1/common/thread_common.c977
-rw-r--r--third_party/aom/av1/common/thread_common.h90
-rw-r--r--third_party/aom/av1/common/tile_common.c236
-rw-r--r--third_party/aom/av1/common/tile_common.h27
-rw-r--r--third_party/aom/av1/common/timing.c79
-rw-r--r--third_party/aom/av1/common/timing.h59
-rw-r--r--third_party/aom/av1/common/token_cdfs.h8769
-rw-r--r--third_party/aom/av1/common/txb_common.c559
-rw-r--r--third_party/aom/av1/common/txb_common.h911
-rw-r--r--third_party/aom/av1/common/warped_motion.c1048
-rw-r--r--third_party/aom/av1/common/warped_motion.h99
-rw-r--r--third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c228
-rw-r--r--third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c665
-rw-r--r--third_party/aom/av1/common/x86/av1_convolve_ssse3.c1034
-rw-r--r--third_party/aom/av1/common/x86/av1_fwd_txfm1d_sse4.c839
-rw-r--r--third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c82
-rw-r--r--third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c334
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c1957
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h210
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c2917
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h236
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm1d_sse4.h144
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse2.h317
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse4.c10
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse4.h60
-rw-r--r--third_party/aom/av1/common/x86/cfl_avx2.c491
-rw-r--r--third_party/aom/av1/common/x86/cfl_simd.h238
-rw-r--r--third_party/aom/av1/common/x86/cfl_sse2.c89
-rw-r--r--third_party/aom/av1/common/x86/cfl_ssse3.c393
-rw-r--r--third_party/aom/av1/common/x86/convolve_2d_avx2.c285
-rw-r--r--third_party/aom/av1/common/x86/convolve_2d_sse2.c496
-rw-r--r--third_party/aom/av1/common/x86/convolve_avx2.c581
-rw-r--r--third_party/aom/av1/common/x86/convolve_sse2.c339
-rw-r--r--third_party/aom/av1/common/x86/filterintra_sse4.c931
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c327
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c191
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c421
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c528
-rw-r--r--third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c339
-rw-r--r--third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c2179
-rw-r--r--third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c853
-rw-r--r--third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c383
-rw-r--r--third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h10
-rw-r--r--third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c454
-rw-r--r--third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c365
-rw-r--r--third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c245
-rw-r--r--third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c202
-rw-r--r--third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c450
-rw-r--r--third_party/aom/av1/common/x86/idct_intrin_sse2.c1411
-rw-r--r--third_party/aom/av1/common/x86/intra_edge_sse4.c12
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_avx2.c704
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_sse2.c385
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_ssse3.c232
-rw-r--r--third_party/aom/av1/common/x86/pvq_sse4.c252
-rw-r--r--third_party/aom/av1/common/x86/pvq_sse4.h13
-rw-r--r--third_party/aom/av1/common/x86/reconinter_avx2.c124
-rw-r--r--third_party/aom/av1/common/x86/reconinter_sse4.c153
-rw-r--r--third_party/aom/av1/common/x86/reconinter_ssse3.c116
-rw-r--r--third_party/aom/av1/common/x86/selfguided_avx2.c719
-rw-r--r--third_party/aom/av1/common/x86/selfguided_sse4.c2254
-rw-r--r--third_party/aom/av1/common/x86/warp_plane_sse2.c359
-rw-r--r--third_party/aom/av1/common/x86/warp_plane_sse4.c (renamed from third_party/aom/av1/common/x86/warp_plane_ssse3.c)384
-rw-r--r--third_party/aom/av1/common/x86/wiener_convolve_avx2.c260
-rw-r--r--third_party/aom/av1/common/x86/wiener_convolve_sse2.c198
-rw-r--r--third_party/aom/av1/common/zigzag.h33
-rw-r--r--third_party/aom/av1/common/zigzag16.c157
-rw-r--r--third_party/aom/av1/common/zigzag32.c199
-rw-r--r--third_party/aom/av1/common/zigzag4.c22
-rw-r--r--third_party/aom/av1/common/zigzag8.c50
178 files changed, 42393 insertions, 75281 deletions
diff --git a/third_party/aom/av1/common/alloccommon.c b/third_party/aom/av1/common/alloccommon.c
index fd635686f..49902cc7d 100644
--- a/third_party/aom/av1/common/alloccommon.c
+++ b/third_party/aom/av1/common/alloccommon.c
@@ -10,7 +10,8 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom_mem/aom_mem.h"
#include "av1/common/alloccommon.h"
@@ -25,16 +26,43 @@ int av1_get_MBs(int width, int height) {
const int mi_cols = aligned_width >> MI_SIZE_LOG2;
const int mi_rows = aligned_height >> MI_SIZE_LOG2;
-#if CONFIG_CB4X4
const int mb_cols = (mi_cols + 2) >> 2;
const int mb_rows = (mi_rows + 2) >> 2;
-#else
- const int mb_cols = (mi_cols + 1) >> 1;
- const int mb_rows = (mi_rows + 1) >> 1;
-#endif
return mb_rows * mb_cols;
}
+#if LOOP_FILTER_BITMASK
+static int alloc_loop_filter_mask(AV1_COMMON *cm) {
+ aom_free(cm->lf.lfm);
+ cm->lf.lfm = NULL;
+
+ // Each lfm holds bit masks for all the 4x4 blocks in a max
+ // 64x64 (128x128 for ext_partitions) region. The stride
+ // and rows are rounded up / truncated to a multiple of 16
+ // (32 for ext_partition).
+ cm->lf.lfm_stride = (cm->mi_cols + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2;
+ cm->lf.lfm_num = ((cm->mi_rows + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2) *
+ cm->lf.lfm_stride;
+ cm->lf.lfm =
+ (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
+ if (!cm->lf.lfm) return 1;
+
+ unsigned int i;
+ for (i = 0; i < cm->lf.lfm_num; ++i) av1_zero(cm->lf.lfm[i]);
+
+ return 0;
+}
+
+static void free_loop_filter_mask(AV1_COMMON *cm) {
+ if (cm->lf.lfm == NULL) return;
+
+ aom_free(cm->lf.lfm);
+ cm->lf.lfm = NULL;
+ cm->lf.lfm_num = 0;
+ cm->lf.lfm_stride = 0;
+}
+#endif
+
void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
// Ensure that the decoded width and height are both multiples of
// 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
@@ -48,79 +76,13 @@ void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
cm->mi_stride = calc_mi_size(cm->mi_cols);
-#if CONFIG_CB4X4
cm->mb_cols = (cm->mi_cols + 2) >> 2;
cm->mb_rows = (cm->mi_rows + 2) >> 2;
-#else
- cm->mb_cols = (cm->mi_cols + 1) >> 1;
- cm->mb_rows = (cm->mi_rows + 1) >> 1;
-#endif
cm->MBs = cm->mb_rows * cm->mb_cols;
-}
-
-static int alloc_seg_map(AV1_COMMON *cm, int seg_map_size) {
- int i;
-
- for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
- cm->seg_map_array[i] = (uint8_t *)aom_calloc(seg_map_size, 1);
- if (cm->seg_map_array[i] == NULL) return 1;
- }
- cm->seg_map_alloc_size = seg_map_size;
-
- // Init the index.
- cm->seg_map_idx = 0;
- cm->prev_seg_map_idx = 1;
-
- cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
- if (!cm->frame_parallel_decode)
- cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
-
- return 0;
-}
-
-static void free_seg_map(AV1_COMMON *cm) {
- int i;
- for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
- aom_free(cm->seg_map_array[i]);
- cm->seg_map_array[i] = NULL;
- }
-
- cm->current_frame_seg_map = NULL;
-
- if (!cm->frame_parallel_decode) {
- cm->last_frame_seg_map = NULL;
- }
- cm->seg_map_alloc_size = 0;
-}
-
-static void free_scratch_buffers(AV1_COMMON *cm) {
- (void)cm;
-#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
- for (int i = 0; i < 4; ++i) {
- if (cm->ncobmcaw_buf[i]) {
- aom_free(cm->ncobmcaw_buf[i]);
- cm->ncobmcaw_buf[i] = NULL;
- }
- }
-#endif // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
-}
-
-static int alloc_scratch_buffers(AV1_COMMON *cm) {
- (void)cm;
-#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
- // If not allocated already, allocate
- if (!cm->ncobmcaw_buf[0] && !cm->ncobmcaw_buf[1] && !cm->ncobmcaw_buf[2] &&
- !cm->ncobmcaw_buf[3]) {
- for (int i = 0; i < 4; ++i) {
- CHECK_MEM_ERROR(
- cm, cm->ncobmcaw_buf[i],
- (uint8_t *)aom_memalign(
- 16, (1 + CONFIG_HIGHBITDEPTH) * MAX_MB_PLANE * MAX_SB_SQUARE));
- }
- }
-#endif // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
- return 0;
+#if LOOP_FILTER_BITMASK
+ alloc_loop_filter_mask(cm);
+#endif
}
void av1_free_ref_frame_buffers(BufferPool *pool) {
@@ -134,97 +96,179 @@ void av1_free_ref_frame_buffers(BufferPool *pool) {
}
aom_free(pool->frame_bufs[i].mvs);
pool->frame_bufs[i].mvs = NULL;
-#if CONFIG_MFMV
- aom_free(pool->frame_bufs[i].tpl_mvs);
- pool->frame_bufs[i].tpl_mvs = NULL;
-#endif
+ aom_free(pool->frame_bufs[i].seg_map);
+ pool->frame_bufs[i].seg_map = NULL;
aom_free_frame_buffer(&pool->frame_bufs[i].buf);
-#if CONFIG_HASH_ME
- av1_hash_table_destroy(&pool->frame_bufs[i].hash_table);
-#endif
}
}
-#if CONFIG_LOOP_RESTORATION
-// Assumes cm->rst_info[p].restoration_tilesize is already initialized
+// Assumes cm->rst_info[p].restoration_unit_size is already initialized
void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
- int p;
-#if CONFIG_FRAME_SUPERRES
- int width = cm->superres_upscaled_width;
- int height = cm->superres_upscaled_height;
-#else
- int width = cm->width;
- int height = cm->height;
-#endif // CONFIG_FRAME_SUPERRES
- av1_alloc_restoration_struct(cm, &cm->rst_info[0], width, height);
- for (p = 1; p < MAX_MB_PLANE; ++p)
- av1_alloc_restoration_struct(cm, &cm->rst_info[p],
- ROUND_POWER_OF_TWO(width, cm->subsampling_x),
- ROUND_POWER_OF_TWO(height, cm->subsampling_y));
- aom_free(cm->rst_internal.tmpbuf);
- CHECK_MEM_ERROR(cm, cm->rst_internal.tmpbuf,
- (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
-
-#if CONFIG_STRIPED_LOOP_RESTORATION
- // Allocate internal storage for the loop restoration stripe boundary lines
- for (p = 0; p < MAX_MB_PLANE; ++p) {
- int w = p == 0 ? width : ROUND_POWER_OF_TWO(width, cm->subsampling_x);
- int align_bits = 5; // align for efficiency
- int stride = ALIGN_POWER_OF_TWO(w, align_bits);
- int num_stripes = (height + 63) / 64;
- // for each processing stripe: 2 lines above, 2 below
- int buf_size = num_stripes * 2 * stride;
- uint8_t *above_buf, *below_buf;
-
- aom_free(cm->rst_internal.stripe_boundary_above[p]);
- aom_free(cm->rst_internal.stripe_boundary_below[p]);
-
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth) buf_size = buf_size * 2;
-#endif
- CHECK_MEM_ERROR(cm, above_buf,
- (uint8_t *)aom_memalign(1 << align_bits, buf_size));
- CHECK_MEM_ERROR(cm, below_buf,
- (uint8_t *)aom_memalign(1 << align_bits, buf_size));
- cm->rst_internal.stripe_boundary_above[p] = above_buf;
- cm->rst_internal.stripe_boundary_below[p] = below_buf;
- cm->rst_internal.stripe_boundary_stride[p] = stride;
+ const int num_planes = av1_num_planes(cm);
+ for (int p = 0; p < num_planes; ++p)
+ av1_alloc_restoration_struct(cm, &cm->rst_info[p], p > 0);
+
+ if (cm->rst_tmpbuf == NULL) {
+ CHECK_MEM_ERROR(cm, cm->rst_tmpbuf,
+ (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
+ }
+
+ if (cm->rlbs == NULL) {
+ CHECK_MEM_ERROR(cm, cm->rlbs, aom_malloc(sizeof(RestorationLineBuffers)));
+ }
+
+ // For striped loop restoration, we divide each row of tiles into "stripes",
+ // of height 64 luma pixels but with an offset by RESTORATION_UNIT_OFFSET
+ // luma pixels to match the output from CDEF. We will need to store 2 *
+ // RESTORATION_CTX_VERT lines of data for each stripe, and also need to be
+ // able to quickly answer the question "Where is the <n>'th stripe for tile
+ // row <m>?" To make that efficient, we generate the rst_last_stripe array.
+ int num_stripes = 0;
+ for (int i = 0; i < cm->tile_rows; ++i) {
+ TileInfo tile_info;
+ av1_tile_set_row(&tile_info, cm, i);
+ const int mi_h = tile_info.mi_row_end - tile_info.mi_row_start;
+ const int ext_h = RESTORATION_UNIT_OFFSET + (mi_h << MI_SIZE_LOG2);
+ const int tile_stripes = (ext_h + 63) / 64;
+ num_stripes += tile_stripes;
+ cm->rst_end_stripe[i] = num_stripes;
+ }
+
+ // Now we need to allocate enough space to store the line buffers for the
+ // stripes
+ const int frame_w = cm->superres_upscaled_width;
+ const int use_highbd = cm->use_highbitdepth ? 1 : 0;
+
+ for (int p = 0; p < num_planes; ++p) {
+ const int is_uv = p > 0;
+ const int ss_x = is_uv && cm->subsampling_x;
+ const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
+ const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
+ const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
+ << use_highbd;
+ RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
+
+ if (buf_size != boundaries->stripe_boundary_size ||
+ boundaries->stripe_boundary_above == NULL ||
+ boundaries->stripe_boundary_below == NULL) {
+ aom_free(boundaries->stripe_boundary_above);
+ aom_free(boundaries->stripe_boundary_below);
+
+ CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_above,
+ (uint8_t *)aom_memalign(32, buf_size));
+ CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_below,
+ (uint8_t *)aom_memalign(32, buf_size));
+
+ boundaries->stripe_boundary_size = buf_size;
+ }
+ boundaries->stripe_boundary_stride = stride;
}
-#endif // CONFIG_STRIPED_LOOP_RESTORATION
}
void av1_free_restoration_buffers(AV1_COMMON *cm) {
int p;
for (p = 0; p < MAX_MB_PLANE; ++p)
av1_free_restoration_struct(&cm->rst_info[p]);
- aom_free(cm->rst_internal.tmpbuf);
- cm->rst_internal.tmpbuf = NULL;
+ aom_free(cm->rst_tmpbuf);
+ cm->rst_tmpbuf = NULL;
+ aom_free(cm->rlbs);
+ cm->rlbs = NULL;
+ for (p = 0; p < MAX_MB_PLANE; ++p) {
+ RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
+ aom_free(boundaries->stripe_boundary_above);
+ aom_free(boundaries->stripe_boundary_below);
+ boundaries->stripe_boundary_above = NULL;
+ boundaries->stripe_boundary_below = NULL;
+ }
+
+ aom_free_frame_buffer(&cm->rst_frame);
}
-#endif // CONFIG_LOOP_RESTORATION
-void av1_free_context_buffers(AV1_COMMON *cm) {
+void av1_free_above_context_buffers(AV1_COMMON *cm,
+ int num_free_above_contexts) {
int i;
- cm->free_mi(cm);
- free_seg_map(cm);
- free_scratch_buffers(cm);
- for (i = 0; i < MAX_MB_PLANE; i++) {
+ const int num_planes = cm->num_allocated_above_context_planes;
+
+ for (int tile_row = 0; tile_row < num_free_above_contexts; tile_row++) {
+ for (i = 0; i < num_planes; i++) {
+ aom_free(cm->above_context[i][tile_row]);
+ cm->above_context[i][tile_row] = NULL;
+ }
+ aom_free(cm->above_seg_context[tile_row]);
+ cm->above_seg_context[tile_row] = NULL;
+
+ aom_free(cm->above_txfm_context[tile_row]);
+ cm->above_txfm_context[tile_row] = NULL;
+ }
+ for (i = 0; i < num_planes; i++) {
aom_free(cm->above_context[i]);
cm->above_context[i] = NULL;
}
aom_free(cm->above_seg_context);
cm->above_seg_context = NULL;
- cm->above_context_alloc_cols = 0;
-#if CONFIG_VAR_TX
+
aom_free(cm->above_txfm_context);
cm->above_txfm_context = NULL;
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- aom_free(cm->top_txfm_context[i]);
- cm->top_txfm_context[i] = NULL;
- }
+ cm->num_allocated_above_contexts = 0;
+ cm->num_allocated_above_context_mi_col = 0;
+ cm->num_allocated_above_context_planes = 0;
+}
+
+void av1_free_context_buffers(AV1_COMMON *cm) {
+ cm->free_mi(cm);
+
+ av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
+
+#if LOOP_FILTER_BITMASK
+ free_loop_filter_mask(cm);
#endif
}
+int av1_alloc_above_context_buffers(AV1_COMMON *cm,
+ int num_alloc_above_contexts) {
+ const int num_planes = av1_num_planes(cm);
+ int plane_idx;
+ const int aligned_mi_cols =
+ ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+
+ // Allocate above context buffers
+ cm->num_allocated_above_contexts = num_alloc_above_contexts;
+ cm->num_allocated_above_context_mi_col = aligned_mi_cols;
+ cm->num_allocated_above_context_planes = num_planes;
+ for (plane_idx = 0; plane_idx < num_planes; plane_idx++) {
+ cm->above_context[plane_idx] = (ENTROPY_CONTEXT **)aom_calloc(
+ num_alloc_above_contexts, sizeof(cm->above_context[0]));
+ if (!cm->above_context[plane_idx]) return 1;
+ }
+
+ cm->above_seg_context = (PARTITION_CONTEXT **)aom_calloc(
+ num_alloc_above_contexts, sizeof(cm->above_seg_context));
+ if (!cm->above_seg_context) return 1;
+
+ cm->above_txfm_context = (TXFM_CONTEXT **)aom_calloc(
+ num_alloc_above_contexts, sizeof(cm->above_txfm_context));
+ if (!cm->above_txfm_context) return 1;
+
+ for (int tile_row = 0; tile_row < num_alloc_above_contexts; tile_row++) {
+ for (plane_idx = 0; plane_idx < num_planes; plane_idx++) {
+ cm->above_context[plane_idx][tile_row] = (ENTROPY_CONTEXT *)aom_calloc(
+ aligned_mi_cols, sizeof(*cm->above_context[0][tile_row]));
+ if (!cm->above_context[plane_idx][tile_row]) return 1;
+ }
+
+ cm->above_seg_context[tile_row] = (PARTITION_CONTEXT *)aom_calloc(
+ aligned_mi_cols, sizeof(*cm->above_seg_context[tile_row]));
+ if (!cm->above_seg_context[tile_row]) return 1;
+
+ cm->above_txfm_context[tile_row] = (TXFM_CONTEXT *)aom_calloc(
+ aligned_mi_cols, sizeof(*cm->above_txfm_context[tile_row]));
+ if (!cm->above_txfm_context[tile_row]) return 1;
+ }
+
+ return 0;
+}
+
int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
int new_mi_size;
@@ -235,52 +279,6 @@ int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
if (cm->alloc_mi(cm, new_mi_size)) goto fail;
}
- if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
- // Create the segmentation map structure and set to 0.
- free_seg_map(cm);
- if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
- }
- if (alloc_scratch_buffers(cm)) goto fail;
-
- if (cm->above_context_alloc_cols < cm->mi_cols) {
- // TODO(geza.lore): These are bigger than they need to be.
- // cm->tile_width would be enough but it complicates indexing a
- // little elsewhere.
- const int aligned_mi_cols =
- ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++) {
- aom_free(cm->above_context[i]);
- cm->above_context[i] = (ENTROPY_CONTEXT *)aom_calloc(
- aligned_mi_cols << (MI_SIZE_LOG2 - tx_size_wide_log2[0]),
- sizeof(*cm->above_context[0]));
- if (!cm->above_context[i]) goto fail;
- }
-
- aom_free(cm->above_seg_context);
- cm->above_seg_context = (PARTITION_CONTEXT *)aom_calloc(
- aligned_mi_cols, sizeof(*cm->above_seg_context));
- if (!cm->above_seg_context) goto fail;
-
-#if CONFIG_VAR_TX
- aom_free(cm->above_txfm_context);
- cm->above_txfm_context = (TXFM_CONTEXT *)aom_calloc(
- aligned_mi_cols << TX_UNIT_WIDE_LOG2, sizeof(*cm->above_txfm_context));
- if (!cm->above_txfm_context) goto fail;
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- aom_free(cm->top_txfm_context[i]);
- cm->top_txfm_context[i] =
- (TXFM_CONTEXT *)aom_calloc(aligned_mi_cols << TX_UNIT_WIDE_LOG2,
- sizeof(*cm->top_txfm_context[0]));
- if (!cm->top_txfm_context[i]) goto fail;
- }
-#endif
-
- cm->above_context_alloc_cols = aligned_mi_cols;
- }
-
return 0;
fail:
@@ -299,18 +297,4 @@ void av1_remove_common(AV1_COMMON *cm) {
cm->frame_contexts = NULL;
}
-void av1_init_context_buffers(AV1_COMMON *cm) {
- cm->setup_mi(cm);
- if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
- memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
-}
-
-void av1_swap_current_and_last_seg_map(AV1_COMMON *cm) {
- // Swap indices.
- const int tmp = cm->seg_map_idx;
- cm->seg_map_idx = cm->prev_seg_map_idx;
- cm->prev_seg_map_idx = tmp;
-
- cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
- cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
-}
+void av1_init_context_buffers(AV1_COMMON *cm) { cm->setup_mi(cm); }
diff --git a/third_party/aom/av1/common/alloccommon.h b/third_party/aom/av1/common/alloccommon.h
index 0d420f825..dbcb5b947 100644
--- a/third_party/aom/av1/common/alloccommon.h
+++ b/third_party/aom/av1/common/alloccommon.h
@@ -23,15 +23,17 @@ struct BufferPool;
void av1_remove_common(struct AV1Common *cm);
+int av1_alloc_above_context_buffers(struct AV1Common *cm,
+ int num_alloc_above_contexts);
+void av1_free_above_context_buffers(struct AV1Common *cm,
+ int num_free_above_contexts);
int av1_alloc_context_buffers(struct AV1Common *cm, int width, int height);
void av1_init_context_buffers(struct AV1Common *cm);
void av1_free_context_buffers(struct AV1Common *cm);
void av1_free_ref_frame_buffers(struct BufferPool *pool);
-#if CONFIG_LOOP_RESTORATION
void av1_alloc_restoration_buffers(struct AV1Common *cm);
void av1_free_restoration_buffers(struct AV1Common *cm);
-#endif // CONFIG_LOOP_RESTORATION
int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height);
void av1_free_state_buffers(struct AV1Common *cm);
@@ -39,8 +41,6 @@ void av1_free_state_buffers(struct AV1Common *cm);
void av1_set_mb_mi(struct AV1Common *cm, int width, int height);
int av1_get_MBs(int width, int height);
-void av1_swap_current_and_last_seg_map(struct AV1Common *cm);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/arm/av1_txfm_neon.c b/third_party/aom/av1/common/arm/av1_txfm_neon.c
new file mode 100644
index 000000000..de3c54724
--- /dev/null
+++ b/third_party/aom/av1/common/arm/av1_txfm_neon.c
@@ -0,0 +1,28 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom_ports/mem.h"
+#include "av1/common/arm/mem_neon.h"
+
+void av1_round_shift_array_neon(int32_t *arr, int size, int bit) {
+ assert(!(size % 4));
+ if (!bit) return;
+ const int32x4_t dup_bits_n_32x4 = vdupq_n_s32((int32_t)(-bit));
+ for (int i = 0; i < size; i += 4) {
+ int32x4_t tmp_q_s32 = vld1q_s32(arr);
+ tmp_q_s32 = vrshlq_s32(tmp_q_s32, dup_bits_n_32x4);
+ vst1q_s32(arr, tmp_q_s32);
+ arr += 4;
+ }
+}
diff --git a/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c
new file mode 100644
index 000000000..0d8233744
--- /dev/null
+++ b/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c
@@ -0,0 +1,134 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "aom_ports/mem.h"
+#include "av1/common/arm/mem_neon.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "config/aom_dsp_rtcd.h"
+
+void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride,
+ const uint8_t *src0, uint32_t src0_stride,
+ const uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, int w, int h) {
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 2);
+ assert(w >= 2);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+ uint8x8_t tmp0, tmp1;
+ uint8x16_t res_q;
+ uint16x8_t res, res_low, res_high;
+ uint32x2_t tmp0_32, tmp1_32;
+ uint16x4_t tmp0_16, tmp1_16;
+ const uint8x8_t vdup_64 = vdup_n_u8((uint8_t)64);
+
+ if (w >= 16) {
+ const uint8x16_t vdup_64_q = vdupq_n_u8((uint8_t)64);
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ __builtin_prefetch(src0);
+ __builtin_prefetch(src1);
+ const uint8x16_t tmp0_q = vld1q_u8(src0);
+ const uint8x16_t tmp1_q = vld1q_u8(src1);
+ const uint8x16_t m_q = vld1q_u8(mask);
+ const uint8x16_t max_minus_m_q = vsubq_u8(vdup_64_q, m_q);
+ res_low = vmull_u8(vget_low_u8(m_q), vget_low_u8(tmp0_q));
+ res_low =
+ vmlal_u8(res_low, vget_low_u8(max_minus_m_q), vget_low_u8(tmp1_q));
+ res_high = vmull_u8(vget_high_u8(m_q), vget_high_u8(tmp0_q));
+ res_high = vmlal_u8(res_high, vget_high_u8(max_minus_m_q),
+ vget_high_u8(tmp1_q));
+ res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS),
+ vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS));
+ vst1q_u8(dst, res_q);
+ src0 += 16;
+ src1 += 16;
+ dst += 16;
+ mask += 16;
+ }
+ src0 += src0_stride - w;
+ src1 += src1_stride - w;
+ dst += dst_stride - w;
+ mask -= w;
+ }
+ } else if (w == 8) {
+ const uint8x8_t m = vld1_u8(mask);
+ const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
+ for (int i = 0; i < h; ++i) {
+ __builtin_prefetch(src0);
+ __builtin_prefetch(src1);
+ tmp0 = vld1_u8(src0);
+ tmp1 = vld1_u8(src1);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS));
+ src0 += src0_stride;
+ src1 += src1_stride;
+ dst += dst_stride;
+ }
+ } else if (w == 4) {
+ const uint8x8_t m = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)mask));
+ const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
+ for (int i = 0; i < h; i += 2) {
+ __builtin_prefetch(src0 + 0 * src0_stride);
+ __builtin_prefetch(src0 + 1 * src0_stride);
+ __builtin_prefetch(src1 + 0 * src1_stride);
+ __builtin_prefetch(src1 + 1 * src1_stride);
+ load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32);
+ tmp0 = vreinterpret_u8_u32(tmp0_32);
+ load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32);
+ tmp1 = vreinterpret_u8_u32(tmp1_32);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_lane_u32(
+ (uint32_t *)(dst + (0 * dst_stride)),
+ vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
+ vst1_lane_u32(
+ (uint32_t *)(dst + (1 * dst_stride)),
+ vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+ src0 += (2 * src0_stride);
+ src1 += (2 * src1_stride);
+ dst += (2 * dst_stride);
+ }
+ } else if (w == 2) {
+ const uint8x8_t m = vreinterpret_u8_u16(vld1_dup_u16((uint16_t *)mask));
+ const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
+ for (int i = 0; i < h; i += 2) {
+ __builtin_prefetch(src0 + 0 * src0_stride);
+ __builtin_prefetch(src0 + 1 * src0_stride);
+ __builtin_prefetch(src1 + 0 * src1_stride);
+ __builtin_prefetch(src1 + 1 * src1_stride);
+ load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16);
+ tmp0 = vreinterpret_u8_u16(tmp0_16);
+ load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16);
+ tmp1 = vreinterpret_u8_u16(tmp1_16);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_lane_u16(
+ (uint16_t *)(dst + (0 * dst_stride)),
+ vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
+ vst1_lane_u16(
+ (uint16_t *)(dst + (1 * dst_stride)),
+ vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+ src0 += (2 * src0_stride);
+ src1 += (2 * src1_stride);
+ dst += (2 * dst_stride);
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c
new file mode 100644
index 000000000..33b06b767
--- /dev/null
+++ b/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c
@@ -0,0 +1,141 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "aom_ports/mem.h"
+#include "av1/common/arm/mem_neon.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "config/aom_dsp_rtcd.h"
+
+void aom_blend_a64_vmask_neon(uint8_t *dst, uint32_t dst_stride,
+ const uint8_t *src0, uint32_t src0_stride,
+ const uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, int w, int h) {
+ uint8x8_t tmp0, tmp1;
+ uint8x16_t tmp0_q, tmp1_q, res_q;
+ uint16x8_t res, res_low, res_high;
+ uint32x2_t tmp0_32, tmp1_32;
+ uint16x4_t tmp0_16, tmp1_16;
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 2);
+ assert(w >= 2);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+
+ if (w >= 16) {
+ for (int i = 0; i < h; ++i) {
+ const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]);
+ const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]);
+ for (int j = 0; j < w; j += 16) {
+ __builtin_prefetch(src0);
+ __builtin_prefetch(src1);
+ tmp0_q = vld1q_u8(src0);
+ tmp1_q = vld1q_u8(src1);
+ res_low = vmull_u8(m, vget_low_u8(tmp0_q));
+ res_low = vmlal_u8(res_low, max_minus_m, vget_low_u8(tmp1_q));
+ res_high = vmull_u8(m, vget_high_u8(tmp0_q));
+ res_high = vmlal_u8(res_high, max_minus_m, vget_high_u8(tmp1_q));
+ res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS),
+ vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS));
+ vst1q_u8(dst, res_q);
+ src0 += 16;
+ src1 += 16;
+ dst += 16;
+ }
+ src0 += src0_stride - w;
+ src1 += src1_stride - w;
+ dst += dst_stride - w;
+ }
+ } else if (w == 8) {
+ for (int i = 0; i < h; ++i) {
+ __builtin_prefetch(src0);
+ __builtin_prefetch(src1);
+ const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]);
+ const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]);
+ tmp0 = vld1_u8(src0);
+ tmp1 = vld1_u8(src1);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS));
+ src0 += src0_stride;
+ src1 += src1_stride;
+ dst += dst_stride;
+ }
+ } else if (w == 4) {
+ for (int i = 0; i < h; i += 2) {
+ __builtin_prefetch(src0 + 0 * src0_stride);
+ __builtin_prefetch(src0 + 1 * src0_stride);
+ __builtin_prefetch(src1 + 0 * src1_stride);
+ __builtin_prefetch(src1 + 1 * src1_stride);
+ const uint16x4_t m1 = vdup_n_u16((uint16_t)mask[i]);
+ const uint16x4_t m2 = vdup_n_u16((uint16_t)mask[i + 1]);
+ const uint8x8_t m = vmovn_u16(vcombine_u16(m1, m2));
+ const uint16x4_t max_minus_m1 = vdup_n_u16(64 - (uint16_t)mask[i]);
+ const uint16x4_t max_minus_m2 = vdup_n_u16(64 - (uint16_t)mask[i + 1]);
+ const uint8x8_t max_minus_m =
+ vmovn_u16(vcombine_u16(max_minus_m1, max_minus_m2));
+ load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32);
+ tmp0 = vreinterpret_u8_u32(tmp0_32);
+ load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32);
+ tmp1 = vreinterpret_u8_u32(tmp1_32);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_lane_u32(
+ (uint32_t *)(dst + (0 * dst_stride)),
+ vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
+ vst1_lane_u32(
+ (uint32_t *)(dst + (1 * dst_stride)),
+ vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+ src0 += (2 * src0_stride);
+ src1 += (2 * src1_stride);
+ dst += (2 * dst_stride);
+ }
+ } else if (w == 2) {
+ for (int i = 0; i < h; i += 2) {
+ __builtin_prefetch(src0 + 0 * src0_stride);
+ __builtin_prefetch(src0 + 1 * src0_stride);
+ __builtin_prefetch(src1 + 0 * src1_stride);
+ __builtin_prefetch(src1 + 1 * src1_stride);
+ const uint8x8_t m1 = vdup_n_u8(mask[i]);
+ const uint8x8_t m2 = vdup_n_u8(mask[i + 1]);
+ const uint16x4x2_t m_trn =
+ vtrn_u16(vreinterpret_u16_u8(m1), vreinterpret_u16_u8(m2));
+ const uint8x8_t m = vreinterpret_u8_u16(m_trn.val[0]);
+ const uint8x8_t max_minus_m1 = vdup_n_u8(64 - mask[i]);
+ const uint8x8_t max_minus_m2 = vdup_n_u8(64 - mask[i + 1]);
+ const uint16x4x2_t max_minus_m_trn = vtrn_u16(
+ vreinterpret_u16_u8(max_minus_m1), vreinterpret_u16_u8(max_minus_m2));
+ const uint8x8_t max_minus_m = vreinterpret_u8_u16(max_minus_m_trn.val[0]);
+ load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16);
+ tmp0 = vreinterpret_u8_u16(tmp0_16);
+ load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16);
+ tmp1 = vreinterpret_u8_u16(tmp1_16);
+ res = vmull_u8(m, tmp0);
+ res = vmlal_u8(res, max_minus_m, tmp1);
+ vst1_lane_u16(
+ (uint16_t *)(dst + (0 * dst_stride)),
+ vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
+ vst1_lane_u16(
+ (uint16_t *)(dst + (1 * dst_stride)),
+ vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+ src0 += (2 * src0_stride);
+ src1 += (2 * src1_stride);
+ dst += (2 * dst_stride);
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/arm/cfl_neon.c b/third_party/aom/av1/common/arm/cfl_neon.c
new file mode 100644
index 000000000..d731b6a66
--- /dev/null
+++ b/third_party/aom/av1/common/arm/cfl_neon.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <arm_neon.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/cfl.h"
+
+static INLINE void vldsubstq_s16(int16_t *dst, const uint16_t *src, int offset,
+ int16x8_t sub) {
+ vst1q_s16(dst + offset,
+ vsubq_s16(vreinterpretq_s16_u16(vld1q_u16(src + offset)), sub));
+}
+
+static INLINE uint16x8_t vldaddq_u16(const uint16_t *buf, size_t offset) {
+ return vaddq_u16(vld1q_u16(buf), vld1q_u16(buf + offset));
+}
+
+// Load half of a vector and duplicated in other half
+static INLINE uint8x8_t vldh_dup_u8(const uint8_t *ptr) {
+ return vreinterpret_u8_u32(vld1_dup_u32((const uint32_t *)ptr));
+}
+
+// Store half of a vector.
+static INLINE void vsth_u16(uint16_t *ptr, uint16x4_t val) {
+ *((uint32_t *)ptr) = vreinterpret_u32_u16(val)[0];
+}
+
+// Store half of a vector.
+static INLINE void vsth_u8(uint8_t *ptr, uint8x8_t val) {
+ *((uint32_t *)ptr) = vreinterpret_u32_u8(val)[0];
+}
+
+static void cfl_luma_subsampling_420_lbd_neon(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
+ const int luma_stride = input_stride << 1;
+ do {
+ if (width == 4) {
+ const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input));
+ const uint16x4_t sum = vpadal_u8(top, vldh_dup_u8(input + input_stride));
+ vsth_u16(pred_buf_q3, vshl_n_u16(sum, 1));
+ } else if (width == 8) {
+ const uint16x4_t top = vpaddl_u8(vld1_u8(input));
+ const uint16x4_t sum = vpadal_u8(top, vld1_u8(input + input_stride));
+ vst1_u16(pred_buf_q3, vshl_n_u16(sum, 1));
+ } else if (width == 16) {
+ const uint16x8_t top = vpaddlq_u8(vld1q_u8(input));
+ const uint16x8_t sum = vpadalq_u8(top, vld1q_u8(input + input_stride));
+ vst1q_u16(pred_buf_q3, vshlq_n_u16(sum, 1));
+ } else {
+ const uint8x8x4_t top = vld4_u8(input);
+ const uint8x8x4_t bot = vld4_u8(input + input_stride);
+ // equivalent to a vpaddlq_u8 (because vld4q interleaves)
+ const uint16x8_t top_0 = vaddl_u8(top.val[0], top.val[1]);
+ // equivalent to a vpaddlq_u8 (because vld4q interleaves)
+ const uint16x8_t bot_0 = vaddl_u8(bot.val[0], bot.val[1]);
+ // equivalent to a vpaddlq_u8 (because vld4q interleaves)
+ const uint16x8_t top_1 = vaddl_u8(top.val[2], top.val[3]);
+ // equivalent to a vpaddlq_u8 (because vld4q interleaves)
+ const uint16x8_t bot_1 = vaddl_u8(bot.val[2], bot.val[3]);
+ uint16x8x2_t sum;
+ sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1);
+ sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1);
+ vst2q_u16(pred_buf_q3, sum);
+ }
+ input += luma_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+static void cfl_luma_subsampling_422_lbd_neon(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
+ do {
+ if (width == 4) {
+ const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input));
+ vsth_u16(pred_buf_q3, vshl_n_u16(top, 2));
+ } else if (width == 8) {
+ const uint16x4_t top = vpaddl_u8(vld1_u8(input));
+ vst1_u16(pred_buf_q3, vshl_n_u16(top, 2));
+ } else if (width == 16) {
+ const uint16x8_t top = vpaddlq_u8(vld1q_u8(input));
+ vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 2));
+ } else {
+ const uint8x8x4_t top = vld4_u8(input);
+ uint16x8x2_t sum;
+ // vaddl_u8 is equivalent to a vpaddlq_u8 (because vld4q interleaves)
+ sum.val[0] = vshlq_n_u16(vaddl_u8(top.val[0], top.val[1]), 2);
+ sum.val[1] = vshlq_n_u16(vaddl_u8(top.val[2], top.val[3]), 2);
+ vst2q_u16(pred_buf_q3, sum);
+ }
+ input += input_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+static void cfl_luma_subsampling_444_lbd_neon(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
+ do {
+ if (width == 4) {
+ const uint16x8_t top = vshll_n_u8(vldh_dup_u8(input), 3);
+ vst1_u16(pred_buf_q3, vget_low_u16(top));
+ } else if (width == 8) {
+ const uint16x8_t top = vshll_n_u8(vld1_u8(input), 3);
+ vst1q_u16(pred_buf_q3, top);
+ } else {
+ const uint8x16_t top = vld1q_u8(input);
+ vst1q_u16(pred_buf_q3, vshll_n_u8(vget_low_u8(top), 3));
+ vst1q_u16(pred_buf_q3 + 8, vshll_n_u8(vget_high_u8(top), 3));
+ if (width == 32) {
+ const uint8x16_t next_top = vld1q_u8(input + 16);
+ vst1q_u16(pred_buf_q3 + 16, vshll_n_u8(vget_low_u8(next_top), 3));
+ vst1q_u16(pred_buf_q3 + 24, vshll_n_u8(vget_high_u8(next_top), 3));
+ }
+ }
+ input += input_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+#if __ARM_ARCH <= 7
+uint16x8_t vpaddq_u16(uint16x8_t a, uint16x8_t b) {
+ return vcombine_u16(vpadd_u16(vget_low_u16(a), vget_high_u16(a)),
+ vpadd_u16(vget_low_u16(b), vget_high_u16(b)));
+}
+#endif
+
+static void cfl_luma_subsampling_420_hbd_neon(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
+ const int luma_stride = input_stride << 1;
+ do {
+ if (width == 4) {
+ const uint16x4_t top = vld1_u16(input);
+ const uint16x4_t bot = vld1_u16(input + input_stride);
+ const uint16x4_t sum = vadd_u16(top, bot);
+ const uint16x4_t hsum = vpadd_u16(sum, sum);
+ vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 1));
+ } else if (width < 32) {
+ const uint16x8_t top = vld1q_u16(input);
+ const uint16x8_t bot = vld1q_u16(input + input_stride);
+ const uint16x8_t sum = vaddq_u16(top, bot);
+ if (width == 8) {
+ const uint16x4_t hsum = vget_low_u16(vpaddq_u16(sum, sum));
+ vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 1));
+ } else {
+ const uint16x8_t top_1 = vld1q_u16(input + 8);
+ const uint16x8_t bot_1 = vld1q_u16(input + 8 + input_stride);
+ const uint16x8_t sum_1 = vaddq_u16(top_1, bot_1);
+ const uint16x8_t hsum = vpaddq_u16(sum, sum_1);
+ vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 1));
+ }
+ } else {
+ const uint16x8x4_t top = vld4q_u16(input);
+ const uint16x8x4_t bot = vld4q_u16(input + input_stride);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t top_0 = vaddq_u16(top.val[0], top.val[1]);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t bot_0 = vaddq_u16(bot.val[0], bot.val[1]);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t top_1 = vaddq_u16(top.val[2], top.val[3]);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t bot_1 = vaddq_u16(bot.val[2], bot.val[3]);
+ uint16x8x2_t sum;
+ sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1);
+ sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1);
+ vst2q_u16(pred_buf_q3, sum);
+ }
+ input += luma_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+static void cfl_luma_subsampling_422_hbd_neon(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
+ do {
+ if (width == 4) {
+ const uint16x4_t top = vld1_u16(input);
+ const uint16x4_t hsum = vpadd_u16(top, top);
+ vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 2));
+ } else if (width == 8) {
+ const uint16x4x2_t top = vld2_u16(input);
+ // equivalent to a vpadd_u16 (because vld2 interleaves)
+ const uint16x4_t hsum = vadd_u16(top.val[0], top.val[1]);
+ vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 2));
+ } else if (width == 16) {
+ const uint16x8x2_t top = vld2q_u16(input);
+ // equivalent to a vpaddq_u16 (because vld2q interleaves)
+ const uint16x8_t hsum = vaddq_u16(top.val[0], top.val[1]);
+ vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 2));
+ } else {
+ const uint16x8x4_t top = vld4q_u16(input);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t hsum_0 = vaddq_u16(top.val[0], top.val[1]);
+ // equivalent to a vpaddq_u16 (because vld4q interleaves)
+ const uint16x8_t hsum_1 = vaddq_u16(top.val[2], top.val[3]);
+ uint16x8x2_t result = { { vshlq_n_u16(hsum_0, 2),
+ vshlq_n_u16(hsum_1, 2) } };
+ vst2q_u16(pred_buf_q3, result);
+ }
+ input += input_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+static void cfl_luma_subsampling_444_hbd_neon(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
+ do {
+ if (width == 4) {
+ const uint16x4_t top = vld1_u16(input);
+ vst1_u16(pred_buf_q3, vshl_n_u16(top, 3));
+ } else if (width == 8) {
+ const uint16x8_t top = vld1q_u16(input);
+ vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 3));
+ } else if (width == 16) {
+ uint16x8x2_t top = vld2q_u16(input);
+ top.val[0] = vshlq_n_u16(top.val[0], 3);
+ top.val[1] = vshlq_n_u16(top.val[1], 3);
+ vst2q_u16(pred_buf_q3, top);
+ } else {
+ uint16x8x4_t top = vld4q_u16(input);
+ top.val[0] = vshlq_n_u16(top.val[0], 3);
+ top.val[1] = vshlq_n_u16(top.val[1], 3);
+ top.val[2] = vshlq_n_u16(top.val[2], 3);
+ top.val[3] = vshlq_n_u16(top.val[3], 3);
+ vst4q_u16(pred_buf_q3, top);
+ }
+ input += input_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION(neon)
+
+static INLINE void subtract_average_neon(const uint16_t *src, int16_t *dst,
+ int width, int height,
+ int round_offset,
+ const int num_pel_log2) {
+ const uint16_t *const end = src + height * CFL_BUF_LINE;
+
+ // Round offset is not needed, because NEON will handle the rounding.
+ (void)round_offset;
+
+ // To optimize the use of the CPU pipeline, we process 4 rows per iteration
+ const int step = 4 * CFL_BUF_LINE;
+
+ // At this stage, the prediction buffer contains scaled reconstructed luma
+ // pixels, which are positive integer and only require 15 bits. By using
+ // unsigned integer for the sum, we can do one addition operation inside 16
+ // bits (8 lanes) before having to convert to 32 bits (4 lanes).
+ const uint16_t *sum_buf = src;
+ uint32x4_t sum_32x4 = { 0, 0, 0, 0 };
+ do {
+ // For all widths, we load, add and combine the data so it fits in 4 lanes.
+ if (width == 4) {
+ const uint16x4_t a0 =
+ vadd_u16(vld1_u16(sum_buf), vld1_u16(sum_buf + CFL_BUF_LINE));
+ const uint16x4_t a1 = vadd_u16(vld1_u16(sum_buf + 2 * CFL_BUF_LINE),
+ vld1_u16(sum_buf + 3 * CFL_BUF_LINE));
+ sum_32x4 = vaddq_u32(sum_32x4, vaddl_u16(a0, a1));
+ } else if (width == 8) {
+ const uint16x8_t a0 = vldaddq_u16(sum_buf, CFL_BUF_LINE);
+ const uint16x8_t a1 =
+ vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, CFL_BUF_LINE);
+ sum_32x4 = vpadalq_u16(sum_32x4, a0);
+ sum_32x4 = vpadalq_u16(sum_32x4, a1);
+ } else {
+ const uint16x8_t row0 = vldaddq_u16(sum_buf, 8);
+ const uint16x8_t row1 = vldaddq_u16(sum_buf + CFL_BUF_LINE, 8);
+ const uint16x8_t row2 = vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, 8);
+ const uint16x8_t row3 = vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE, 8);
+ sum_32x4 = vpadalq_u16(sum_32x4, row0);
+ sum_32x4 = vpadalq_u16(sum_32x4, row1);
+ sum_32x4 = vpadalq_u16(sum_32x4, row2);
+ sum_32x4 = vpadalq_u16(sum_32x4, row3);
+
+ if (width == 32) {
+ const uint16x8_t row0_1 = vldaddq_u16(sum_buf + 16, 8);
+ const uint16x8_t row1_1 = vldaddq_u16(sum_buf + CFL_BUF_LINE + 16, 8);
+ const uint16x8_t row2_1 =
+ vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE + 16, 8);
+ const uint16x8_t row3_1 =
+ vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE + 16, 8);
+
+ sum_32x4 = vpadalq_u16(sum_32x4, row0_1);
+ sum_32x4 = vpadalq_u16(sum_32x4, row1_1);
+ sum_32x4 = vpadalq_u16(sum_32x4, row2_1);
+ sum_32x4 = vpadalq_u16(sum_32x4, row3_1);
+ }
+ }
+ sum_buf += step;
+ } while (sum_buf < end);
+
+ // Permute and add in such a way that each lane contains the block sum.
+ // [A+C+B+D, B+D+A+C, C+A+D+B, D+B+C+A]
+#if __ARM_ARCH >= 8
+ sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
+ sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
+#else
+ uint32x4_t flip =
+ vcombine_u32(vget_high_u32(sum_32x4), vget_low_u32(sum_32x4));
+ sum_32x4 = vaddq_u32(sum_32x4, flip);
+ sum_32x4 = vaddq_u32(sum_32x4, vrev64q_u32(sum_32x4));
+#endif
+
+ // Computing the average could be done using scalars, but getting off the NEON
+ // engine introduces latency, so we use vqrshrn.
+ int16x4_t avg_16x4;
+ // Constant propagation makes for some ugly code.
+ switch (num_pel_log2) {
+ case 4: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 4)); break;
+ case 5: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 5)); break;
+ case 6: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 6)); break;
+ case 7: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 7)); break;
+ case 8: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 8)); break;
+ case 9: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 9)); break;
+ case 10:
+ avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 10));
+ break;
+ default: assert(0);
+ }
+
+ if (width == 4) {
+ do {
+ vst1_s16(dst, vsub_s16(vreinterpret_s16_u16(vld1_u16(src)), avg_16x4));
+ src += CFL_BUF_LINE;
+ dst += CFL_BUF_LINE;
+ } while (src < end);
+ } else {
+ const int16x8_t avg_16x8 = vcombine_s16(avg_16x4, avg_16x4);
+ do {
+ vldsubstq_s16(dst, src, 0, avg_16x8);
+ vldsubstq_s16(dst, src, CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 2 * CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 3 * CFL_BUF_LINE, avg_16x8);
+
+ if (width > 8) {
+ vldsubstq_s16(dst, src, 8, avg_16x8);
+ vldsubstq_s16(dst, src, 8 + CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 8 + 2 * CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 8 + 3 * CFL_BUF_LINE, avg_16x8);
+ }
+ if (width == 32) {
+ vldsubstq_s16(dst, src, 16, avg_16x8);
+ vldsubstq_s16(dst, src, 16 + CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 16 + 2 * CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 16 + 3 * CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 24, avg_16x8);
+ vldsubstq_s16(dst, src, 24 + CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 24 + 2 * CFL_BUF_LINE, avg_16x8);
+ vldsubstq_s16(dst, src, 24 + 3 * CFL_BUF_LINE, avg_16x8);
+ }
+ src += step;
+ dst += step;
+ } while (src < end);
+ }
+}
+
+CFL_SUB_AVG_FN(neon)
+
+// Saturating negate 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative.
+// Notes:
+// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in
+// practice, as scaled_luma is the multiplication of two absolute values.
+// * In the Intel equivalent, elements in a are zeroed out when the
+// corresponding elements in b are zero. Because vsign is used twice in a
+// row, with b in the first call becoming a in the second call, there's no
+// impact from not zeroing out.
+static int16x4_t vsign_s16(int16x4_t a, int16x4_t b) {
+ const int16x4_t mask = vshr_n_s16(b, 15);
+ return veor_s16(vadd_s16(a, mask), mask);
+}
+
+// Saturating negate 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative.
+// Notes:
+// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in
+// practice, as scaled_luma is the multiplication of two absolute values.
+// * In the Intel equivalent, elements in a are zeroed out when the
+// corresponding elements in b are zero. Because vsignq is used twice in a
+// row, with b in the first call becoming a in the second call, there's no
+// impact from not zeroing out.
+static int16x8_t vsignq_s16(int16x8_t a, int16x8_t b) {
+ const int16x8_t mask = vshrq_n_s16(b, 15);
+ return veorq_s16(vaddq_s16(a, mask), mask);
+}
+
+static INLINE int16x4_t predict_w4(const int16_t *pred_buf_q3,
+ int16x4_t alpha_sign, int abs_alpha_q12,
+ int16x4_t dc) {
+ const int16x4_t ac_q3 = vld1_s16(pred_buf_q3);
+ const int16x4_t ac_sign = veor_s16(alpha_sign, ac_q3);
+ int16x4_t scaled_luma = vqrdmulh_n_s16(vabs_s16(ac_q3), abs_alpha_q12);
+ return vadd_s16(vsign_s16(scaled_luma, ac_sign), dc);
+}
+
+static INLINE int16x8_t predict_w8(const int16_t *pred_buf_q3,
+ int16x8_t alpha_sign, int abs_alpha_q12,
+ int16x8_t dc) {
+ const int16x8_t ac_q3 = vld1q_s16(pred_buf_q3);
+ const int16x8_t ac_sign = veorq_s16(alpha_sign, ac_q3);
+ int16x8_t scaled_luma = vqrdmulhq_n_s16(vabsq_s16(ac_q3), abs_alpha_q12);
+ return vaddq_s16(vsignq_s16(scaled_luma, ac_sign), dc);
+}
+
+static INLINE int16x8x2_t predict_w16(const int16_t *pred_buf_q3,
+ int16x8_t alpha_sign, int abs_alpha_q12,
+ int16x8_t dc) {
+ // vld2q_s16 interleaves, which is not useful for prediction. vst1q_s16_x2
+ // does not interleave, but is not currently available in the compilier used
+ // by the AOM build system.
+ const int16x8x2_t ac_q3 = vld2q_s16(pred_buf_q3);
+ const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
+ const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
+ const int16x8_t scaled_luma_0 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12);
+ const int16x8_t scaled_luma_1 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12);
+ int16x8x2_t result;
+ result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc);
+ result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc);
+ return result;
+}
+
+static INLINE int16x8x4_t predict_w32(const int16_t *pred_buf_q3,
+ int16x8_t alpha_sign, int abs_alpha_q12,
+ int16x8_t dc) {
+ // vld4q_s16 interleaves, which is not useful for prediction. vst1q_s16_x4
+ // does not interleave, but is not currently available in the compilier used
+ // by the AOM build system.
+ const int16x8x4_t ac_q3 = vld4q_s16(pred_buf_q3);
+ const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
+ const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
+ const int16x8_t ac_sign_2 = veorq_s16(alpha_sign, ac_q3.val[2]);
+ const int16x8_t ac_sign_3 = veorq_s16(alpha_sign, ac_q3.val[3]);
+ const int16x8_t scaled_luma_0 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12);
+ const int16x8_t scaled_luma_1 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12);
+ const int16x8_t scaled_luma_2 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[2]), abs_alpha_q12);
+ const int16x8_t scaled_luma_3 =
+ vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[3]), abs_alpha_q12);
+ int16x8x4_t result;
+ result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc);
+ result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc);
+ result.val[2] = vaddq_s16(vsignq_s16(scaled_luma_2, ac_sign_2), dc);
+ result.val[3] = vaddq_s16(vsignq_s16(scaled_luma_3, ac_sign_3), dc);
+ return result;
+}
+
+static INLINE void cfl_predict_lbd_neon(const int16_t *pred_buf_q3,
+ uint8_t *dst, int dst_stride,
+ int alpha_q3, int width, int height) {
+ const int16_t abs_alpha_q12 = abs(alpha_q3) << 9;
+ const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE;
+ if (width == 4) {
+ const int16x4_t alpha_sign = vdup_n_s16(alpha_q3);
+ const int16x4_t dc = vdup_n_s16(*dst);
+ do {
+ const int16x4_t pred =
+ predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ vsth_u8(dst, vqmovun_s16(vcombine_s16(pred, pred)));
+ dst += dst_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+ } else {
+ const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3);
+ const int16x8_t dc = vdupq_n_s16(*dst);
+ do {
+ if (width == 8) {
+ vst1_u8(dst, vqmovun_s16(predict_w8(pred_buf_q3, alpha_sign,
+ abs_alpha_q12, dc)));
+ } else if (width == 16) {
+ const int16x8x2_t pred =
+ predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ const uint8x8x2_t predun = { { vqmovun_s16(pred.val[0]),
+ vqmovun_s16(pred.val[1]) } };
+ vst2_u8(dst, predun);
+ } else {
+ const int16x8x4_t pred =
+ predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ const uint8x8x4_t predun = {
+ { vqmovun_s16(pred.val[0]), vqmovun_s16(pred.val[1]),
+ vqmovun_s16(pred.val[2]), vqmovun_s16(pred.val[3]) }
+ };
+ vst4_u8(dst, predun);
+ }
+ dst += dst_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+ }
+}
+
+CFL_PREDICT_FN(neon, lbd)
+
+static INLINE uint16x4_t clamp_s16(int16x4_t a, int16x4_t max) {
+ return vreinterpret_u16_s16(vmax_s16(vmin_s16(a, max), vdup_n_s16(0)));
+}
+
+static INLINE uint16x8_t clampq_s16(int16x8_t a, int16x8_t max) {
+ return vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(a, max), vdupq_n_s16(0)));
+}
+
+static INLINE uint16x8x2_t clamp2q_s16(int16x8x2_t a, int16x8_t max) {
+ uint16x8x2_t result;
+ result.val[0] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0)));
+ result.val[1] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0)));
+ return result;
+}
+
+static INLINE uint16x8x4_t clamp4q_s16(int16x8x4_t a, int16x8_t max) {
+ uint16x8x4_t result;
+ result.val[0] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0)));
+ result.val[1] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0)));
+ result.val[2] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[2], max), vdupq_n_s16(0)));
+ result.val[3] = vreinterpretq_u16_s16(
+ vmaxq_s16(vminq_s16(a.val[3], max), vdupq_n_s16(0)));
+ return result;
+}
+
+static INLINE void cfl_predict_hbd_neon(const int16_t *pred_buf_q3,
+ uint16_t *dst, int dst_stride,
+ int alpha_q3, int bd, int width,
+ int height) {
+ const int max = (1 << bd) - 1;
+ const int16_t abs_alpha_q12 = abs(alpha_q3) << 9;
+ const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE;
+ if (width == 4) {
+ const int16x4_t alpha_sign = vdup_n_s16(alpha_q3);
+ const int16x4_t dc = vdup_n_s16(*dst);
+ const int16x4_t max_16x4 = vdup_n_s16(max);
+ do {
+ const int16x4_t scaled_luma =
+ predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ vst1_u16(dst, clamp_s16(scaled_luma, max_16x4));
+ dst += dst_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+ } else {
+ const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3);
+ const int16x8_t dc = vdupq_n_s16(*dst);
+ const int16x8_t max_16x8 = vdupq_n_s16(max);
+ do {
+ if (width == 8) {
+ const int16x8_t pred =
+ predict_w8(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ vst1q_u16(dst, clampq_s16(pred, max_16x8));
+ } else if (width == 16) {
+ const int16x8x2_t pred =
+ predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ vst2q_u16(dst, clamp2q_s16(pred, max_16x8));
+ } else {
+ const int16x8x4_t pred =
+ predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
+ vst4q_u16(dst, clamp4q_s16(pred, max_16x8));
+ }
+ dst += dst_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+ }
+}
+
+CFL_PREDICT_FN(neon, hbd)
diff --git a/third_party/aom/av1/common/arm/convolve_neon.c b/third_party/aom/av1/common/arm/convolve_neon.c
new file mode 100644
index 000000000..86a25e109
--- /dev/null
+++ b/third_party/aom/av1/common/arm/convolve_neon.c
@@ -0,0 +1,1134 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <arm_neon.h>
+
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_ports/mem.h"
+#include "av1/common/convolve.h"
+#include "av1/common/filter.h"
+#include "av1/common/arm/convolve_neon.h"
+#include "av1/common/arm/mem_neon.h"
+#include "av1/common/arm/transpose_neon.h"
+
+static INLINE int16x4_t convolve8_4x4(const int16x4_t s0, const int16x4_t s1,
+ const int16x4_t s2, const int16x4_t s3,
+ const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, const int16x4_t s7,
+ const int16_t *filter) {
+ int16x4_t sum;
+
+ sum = vmul_n_s16(s0, filter[0]);
+ sum = vmla_n_s16(sum, s1, filter[1]);
+ sum = vmla_n_s16(sum, s2, filter[2]);
+ sum = vmla_n_s16(sum, s5, filter[5]);
+ sum = vmla_n_s16(sum, s6, filter[6]);
+ sum = vmla_n_s16(sum, s7, filter[7]);
+ /* filter[3] can take a max value of 128. So the max value of the result :
+ * 128*255 + sum > 16 bits
+ */
+ sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3]));
+ sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4]));
+
+ return sum;
+}
+
+static INLINE uint8x8_t convolve8_horiz_8x8(
+ const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
+ const int16x8_t shift_round_0, const int16x8_t shift_by_bits) {
+ int16x8_t sum;
+
+ sum = vmulq_n_s16(s0, filter[0]);
+ sum = vmlaq_n_s16(sum, s1, filter[1]);
+ sum = vmlaq_n_s16(sum, s2, filter[2]);
+ sum = vmlaq_n_s16(sum, s5, filter[5]);
+ sum = vmlaq_n_s16(sum, s6, filter[6]);
+ sum = vmlaq_n_s16(sum, s7, filter[7]);
+ /* filter[3] can take a max value of 128. So the max value of the result :
+ * 128*255 + sum > 16 bits
+ */
+ sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
+ sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
+
+ sum = vqrshlq_s16(sum, shift_round_0);
+ sum = vqrshlq_s16(sum, shift_by_bits);
+
+ return vqmovun_s16(sum);
+}
+
+static INLINE uint8x8_t convolve8_vert_8x4(
+ const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7, const int16_t *filter) {
+ int16x8_t sum;
+
+ sum = vmulq_n_s16(s0, filter[0]);
+ sum = vmlaq_n_s16(sum, s1, filter[1]);
+ sum = vmlaq_n_s16(sum, s2, filter[2]);
+ sum = vmlaq_n_s16(sum, s5, filter[5]);
+ sum = vmlaq_n_s16(sum, s6, filter[6]);
+ sum = vmlaq_n_s16(sum, s7, filter[7]);
+ /* filter[3] can take a max value of 128. So the max value of the result :
+ * 128*255 + sum > 16 bits
+ */
+ sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
+ sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
+
+ return vqrshrun_n_s16(sum, FILTER_BITS);
+}
+
+static INLINE uint16x4_t convolve8_vert_4x4_s32(
+ const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+ const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
+ const int32x4_t round_shift_vec, const int32x4_t offset_const,
+ const int32x4_t sub_const_vec) {
+ int32x4_t sum0;
+ uint16x4_t res;
+ const int32x4_t zero = vdupq_n_s32(0);
+
+ sum0 = vmull_n_s16(s0, y_filter[0]);
+ sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
+ sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
+ sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
+ sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
+ sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
+ sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
+ sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
+
+ sum0 = vaddq_s32(sum0, offset_const);
+ sum0 = vqrshlq_s32(sum0, round_shift_vec);
+ sum0 = vsubq_s32(sum0, sub_const_vec);
+ sum0 = vmaxq_s32(sum0, zero);
+
+ res = vmovn_u32(vreinterpretq_u32_s32(sum0));
+
+ return res;
+}
+
+static INLINE uint8x8_t convolve8_vert_8x4_s32(
+ const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7, const int16_t *y_filter,
+ const int32x4_t round_shift_vec, const int32x4_t offset_const,
+ const int32x4_t sub_const_vec, const int16x8_t vec_round_bits) {
+ int32x4_t sum0, sum1;
+ uint16x8_t res;
+ const int32x4_t zero = vdupq_n_s32(0);
+
+ sum0 = vmull_n_s16(vget_low_s16(s0), y_filter[0]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s4), y_filter[4]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s5), y_filter[5]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s6), y_filter[6]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s7), y_filter[7]);
+
+ sum1 = vmull_n_s16(vget_high_s16(s0), y_filter[0]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s1), y_filter[1]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s2), y_filter[2]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), y_filter[3]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s4), y_filter[4]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s5), y_filter[5]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s6), y_filter[6]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s7), y_filter[7]);
+
+ sum0 = vaddq_s32(sum0, offset_const);
+ sum1 = vaddq_s32(sum1, offset_const);
+ sum0 = vqrshlq_s32(sum0, round_shift_vec);
+ sum1 = vqrshlq_s32(sum1, round_shift_vec);
+ sum0 = vsubq_s32(sum0, sub_const_vec);
+ sum1 = vsubq_s32(sum1, sub_const_vec);
+ sum0 = vmaxq_s32(sum0, zero);
+ sum1 = vmaxq_s32(sum1, zero);
+ res = vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(sum0)),
+ vqmovn_u32(vreinterpretq_u32_s32(sum1)));
+
+ res = vqrshlq_u16(res, vec_round_bits);
+
+ return vqmovn_u16(res);
+}
+
+void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
+ const int8_t bits = FILTER_BITS - conv_params->round_0;
+
+ (void)subpel_y_q4;
+ (void)conv_params;
+ (void)filter_params_y;
+
+ uint8x8_t t0, t1, t2, t3;
+
+ assert(bits >= 0);
+ assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+ ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+
+ const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0);
+ const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
+
+ src -= horiz_offset;
+
+ if (h == 4) {
+ uint8x8_t d01, d23;
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+ int16x8_t d01_temp, d23_temp;
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+
+ load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+ s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+ s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ src += 7;
+
+ do {
+ load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+ s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+
+ d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, x_filter);
+
+ d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, x_filter);
+
+ d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, x_filter);
+
+ d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, x_filter);
+
+ d01_temp = vqrshlq_s16(vcombine_s16(d0, d1), shift_round_0);
+ d23_temp = vqrshlq_s16(vcombine_s16(d2, d3), shift_round_0);
+
+ d01_temp = vqrshlq_s16(d01_temp, shift_by_bits);
+ d23_temp = vqrshlq_s16(d23_temp, shift_by_bits);
+
+ d01 = vqmovun_s16(d01_temp);
+ d23 = vqmovun_s16(d23_temp);
+
+ transpose_u8_4x4(&d01, &d23);
+
+ if (w != 2) {
+ vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), // 00 01 02 03
+ vreinterpret_u32_u8(d01), 0);
+ vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), // 10 11 12 13
+ vreinterpret_u32_u8(d23), 0);
+ vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), // 20 21 22 23
+ vreinterpret_u32_u8(d01), 1);
+ vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), // 30 31 32 33
+ vreinterpret_u32_u8(d23), 1);
+ } else {
+ vst1_lane_u16((uint16_t *)(dst + 0 * dst_stride), // 00 01
+ vreinterpret_u16_u8(d01), 0);
+ vst1_lane_u16((uint16_t *)(dst + 1 * dst_stride), // 10 11
+ vreinterpret_u16_u8(d23), 0);
+ vst1_lane_u16((uint16_t *)(dst + 2 * dst_stride), // 20 21
+ vreinterpret_u16_u8(d01), 2);
+ vst1_lane_u16((uint16_t *)(dst + 3 * dst_stride), // 30 31
+ vreinterpret_u16_u8(d23), 2);
+ }
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ src += 4;
+ dst += 4;
+ w -= 4;
+ } while (w > 0);
+ } else {
+ int width;
+ const uint8_t *s;
+ uint8x8_t t4, t5, t6, t7;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+
+ if (w <= 4) {
+ do {
+ load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ load_u8_8x8(src + 7, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6,
+ &t7);
+ src += 8 * src_stride;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+
+ transpose_u8_4x8(&t0, &t1, &t2, &t3, t4, t5, t6, t7);
+
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
+ shift_round_0, shift_by_bits);
+ t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
+ shift_round_0, shift_by_bits);
+ t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
+ shift_round_0, shift_by_bits);
+ t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
+ shift_round_0, shift_by_bits);
+
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ if ((w == 4) && (h > 4)) {
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
+ 0); // 00 01 02 03
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
+ 0); // 10 11 12 13
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2),
+ 0); // 20 21 22 23
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3),
+ 0); // 30 31 32 33
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
+ 1); // 40 41 42 43
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
+ 1); // 50 51 52 53
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2),
+ 1); // 60 61 62 63
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3),
+ 1); // 70 71 72 73
+ dst += dst_stride;
+ } else if ((w == 4) && (h == 2)) {
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
+ 0); // 00 01 02 03
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
+ 0); // 10 11 12 13
+ dst += dst_stride;
+ } else if ((w == 2) && (h > 4)) {
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 0); // 20 21
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 0); // 30 31
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 2); // 40 41
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 2); // 50 51
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 2); // 60 61
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 2); // 70 71
+ dst += dst_stride;
+ } else if ((w == 2) && (h == 2)) {
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11
+ dst += dst_stride;
+ }
+ h -= 8;
+ } while (h > 0);
+ } else {
+ uint8_t *d;
+ int16x8_t s11, s12, s13, s14;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ width = w;
+ s = src + 7;
+ d = dst;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+
+ do {
+ load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
+
+ t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t4 = convolve8_horiz_8x8(s4, s5, s6, s7, s8, s9, s10, s11, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t5 = convolve8_horiz_8x8(s5, s6, s7, s8, s9, s10, s11, s12, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t6 = convolve8_horiz_8x8(s6, s7, s8, s9, s10, s11, s12, s13, x_filter,
+ shift_round_0, shift_by_bits);
+
+ t7 = convolve8_horiz_8x8(s7, s8, s9, s10, s11, s12, s13, s14,
+ x_filter, shift_round_0, shift_by_bits);
+
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ if (h != 2) {
+ store_u8_8x8(d, dst_stride, t0, t1, t2, t3, t4, t5, t6, t7);
+ } else {
+ store_row2_u8_8x8(d, dst_stride, t0, t1);
+ }
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 8 * src_stride;
+ dst += 8 * dst_stride;
+ h -= 8;
+ } while (h > 0);
+ }
+ }
+}
+
+void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int vert_offset = filter_params_y->taps / 2 - 1;
+
+ src -= vert_offset * src_stride;
+
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ (void)conv_params;
+
+ assert(conv_params->round_0 <= FILTER_BITS);
+ assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
+ ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
+
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+
+ if (w <= 4) {
+ uint8x8_t d01, d23;
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+
+ s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s2 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s3 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s4 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s6 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+
+ do {
+ s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+ s10 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
+ src += src_stride;
+
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
+ d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter);
+ d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter);
+ d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter);
+
+ d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), FILTER_BITS);
+ d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), FILTER_BITS);
+ if ((w == 4) && (h != 2)) {
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
+ 0); // 00 01 02 03
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
+ 1); // 10 11 12 13
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23),
+ 0); // 20 21 22 23
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23),
+ 1); // 30 31 32 33
+ dst += dst_stride;
+ } else if ((w == 4) && (h == 2)) {
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
+ 0); // 00 01 02 03
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
+ 1); // 10 11 12 13
+ dst += dst_stride;
+ } else if ((w == 2) && (h != 2)) {
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 0); // 20 21
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 2); // 30 31
+ dst += dst_stride;
+ } else if ((w == 2) && (h == 2)) {
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01
+ dst += dst_stride;
+ vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11
+ dst += dst_stride;
+ }
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ int height;
+ const uint8_t *s;
+ uint8_t *d;
+ uint8x8_t t0, t1, t2, t3;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ s = src;
+ s0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s3 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s4 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s6 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ d = dst;
+ height = h;
+
+ do {
+ s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+ s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
+ s += src_stride;
+
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+ t0 = convolve8_vert_8x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
+ t1 = convolve8_vert_8x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter);
+ t2 = convolve8_vert_8x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter);
+ t3 = convolve8_vert_8x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter);
+ if (h != 2) {
+ vst1_u8(d, t0);
+ d += dst_stride;
+ vst1_u8(d, t1);
+ d += dst_stride;
+ vst1_u8(d, t2);
+ d += dst_stride;
+ vst1_u8(d, t3);
+ d += dst_stride;
+ } else {
+ vst1_u8(d, t0);
+ d += dst_stride;
+ vst1_u8(d, t1);
+ d += dst_stride;
+ }
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+}
+
+void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ int im_dst_stride;
+ int width, height;
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+ DECLARE_ALIGNED(16, int16_t,
+ im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
+
+ const int bd = 8;
+ const int im_h = h + filter_params_y->taps - 1;
+ const int im_stride = MAX_SB_SIZE;
+ const int vert_offset = filter_params_y->taps / 2 - 1;
+ const int horiz_offset = filter_params_x->taps / 2 - 1;
+
+ const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
+ const uint8_t *s;
+ int16_t *dst_ptr;
+
+ dst_ptr = im_block;
+ im_dst_stride = im_stride;
+ height = im_h;
+ width = w;
+
+ const int16_t round_bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+
+ int16_t x_filter_tmp[8];
+ int16x8_t filter_x_coef = vld1q_s16(x_filter);
+
+ // filter coeffs are even, so downshifting by 1 to reduce intermediate
+ // precision requirements.
+ filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
+ vst1q_s16(&x_filter_tmp[0], filter_x_coef);
+
+ assert(conv_params->round_0 > 0);
+
+ if (w <= 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+
+ const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)));
+ const int16x4_t shift_round_0 = vdup_n_s16(-(conv_params->round_0 - 1));
+
+ do {
+ s = src_ptr;
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+ s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+ s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+
+ __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
+ s += 7;
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+ s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+ s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+ s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+
+ d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ horiz_const, shift_round_0);
+
+ transpose_s16_4x4d(&d0, &d1, &d2, &d3);
+ if (w == 4) {
+ vst1_s16((dst_ptr + 0 * im_dst_stride), d0);
+ vst1_s16((dst_ptr + 1 * im_dst_stride), d1);
+ vst1_s16((dst_ptr + 2 * im_dst_stride), d2);
+ vst1_s16((dst_ptr + 3 * im_dst_stride), d3);
+ } else if (w == 2) {
+ vst1_lane_u32((uint32_t *)(dst_ptr + 0 * im_dst_stride),
+ vreinterpret_u32_s16(d0), 0);
+ vst1_lane_u32((uint32_t *)(dst_ptr + 1 * im_dst_stride),
+ vreinterpret_u32_s16(d1), 0);
+ vst1_lane_u32((uint32_t *)(dst_ptr + 2 * im_dst_stride),
+ vreinterpret_u32_s16(d2), 0);
+ vst1_lane_u32((uint32_t *)(dst_ptr + 3 * im_dst_stride),
+ vreinterpret_u32_s16(d3), 0);
+ }
+ src_ptr += 4 * src_stride;
+ dst_ptr += 4 * im_dst_stride;
+ height -= 4;
+ } while (height > 0);
+ } else {
+ int16_t *d_tmp;
+ int16x8_t s11, s12, s13, s14;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int16x8_t res0, res1, res2, res3, res4, res5, res6, res7;
+
+ const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)));
+ const int16x8_t shift_round_0 = vdupq_n_s16(-(conv_params->round_0 - 1));
+
+ do {
+ __builtin_prefetch(src_ptr + 0 * src_stride);
+ __builtin_prefetch(src_ptr + 1 * src_stride);
+ __builtin_prefetch(src_ptr + 2 * src_stride);
+ __builtin_prefetch(src_ptr + 3 * src_stride);
+ __builtin_prefetch(src_ptr + 4 * src_stride);
+ __builtin_prefetch(src_ptr + 5 * src_stride);
+ __builtin_prefetch(src_ptr + 6 * src_stride);
+ __builtin_prefetch(src_ptr + 7 * src_stride);
+
+ load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ width = w;
+ s = src_ptr + 7;
+ d_tmp = dst_ptr;
+
+ __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 4 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 5 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 6 * im_dst_stride);
+ __builtin_prefetch(dst_ptr + 7 * im_dst_stride);
+
+ do {
+ load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
+
+ res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
+ x_filter_tmp, horiz_const, shift_round_0);
+ res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
+ x_filter_tmp, horiz_const, shift_round_0);
+ res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
+ x_filter_tmp, horiz_const, shift_round_0);
+
+ transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
+ &res7);
+
+ store_s16_8x8(d_tmp, im_dst_stride, res0, res1, res2, res3, res4, res5,
+ res6, res7);
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d_tmp += 8;
+ width -= 8;
+ } while (width > 0);
+ src_ptr += 8 * src_stride;
+ dst_ptr += 8 * im_dst_stride;
+ height -= 8;
+ } while (height > 0);
+ }
+
+ // vertical
+ {
+ uint8_t *dst_u8_ptr, *d_u8;
+ int16_t *v_src_ptr, *v_s;
+
+ const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+
+ const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
+ const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
+ const int32x4_t sub_const_vec = vdupq_n_s32(sub_const);
+
+ src_stride = im_stride;
+ v_src_ptr = im_block;
+ dst_u8_ptr = dst;
+
+ height = h;
+ width = w;
+
+ if (width <= 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x4_t d0, d1, d2, d3;
+ uint16x8_t dd0, dd1;
+ uint8x8_t d01, d23;
+
+ d_u8 = dst_u8_ptr;
+ v_s = v_src_ptr;
+
+ __builtin_prefetch(v_s + 0 * im_stride);
+ __builtin_prefetch(v_s + 1 * im_stride);
+ __builtin_prefetch(v_s + 2 * im_stride);
+ __builtin_prefetch(v_s + 3 * im_stride);
+ __builtin_prefetch(v_s + 4 * im_stride);
+ __builtin_prefetch(v_s + 5 * im_stride);
+ __builtin_prefetch(v_s + 6 * im_stride);
+ __builtin_prefetch(v_s + 7 * im_stride);
+
+ load_s16_4x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+ v_s += (7 * im_stride);
+
+ do {
+ load_s16_4x4(v_s, im_stride, &s7, &s8, &s9, &s10);
+ v_s += (im_stride << 2);
+
+ __builtin_prefetch(d_u8 + 0 * dst_stride);
+ __builtin_prefetch(d_u8 + 1 * dst_stride);
+ __builtin_prefetch(d_u8 + 2 * dst_stride);
+ __builtin_prefetch(d_u8 + 3 * dst_stride);
+
+ d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+ round_shift_vec, offset_const,
+ sub_const_vec);
+ d1 = convolve8_vert_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
+ round_shift_vec, offset_const,
+ sub_const_vec);
+ d2 = convolve8_vert_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
+ round_shift_vec, offset_const,
+ sub_const_vec);
+ d3 = convolve8_vert_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
+ round_shift_vec, offset_const,
+ sub_const_vec);
+
+ dd0 = vqrshlq_u16(vcombine_u16(d0, d1), vec_round_bits);
+ dd1 = vqrshlq_u16(vcombine_u16(d2, d3), vec_round_bits);
+
+ d01 = vqmovn_u16(dd0);
+ d23 = vqmovn_u16(dd1);
+
+ if ((w == 4) && (h != 2)) {
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
+ 0); // 00 01 02 03
+ d_u8 += dst_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
+ 1); // 10 11 12 13
+ d_u8 += dst_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
+ 0); // 20 21 22 23
+ d_u8 += dst_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
+ 1); // 30 31 32 33
+ d_u8 += dst_stride;
+ } else if ((w == 2) && (h != 2)) {
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
+ 0); // 00 01
+ d_u8 += dst_stride;
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
+ 2); // 10 11
+ d_u8 += dst_stride;
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
+ 0); // 20 21
+ d_u8 += dst_stride;
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
+ 2); // 30 31
+ d_u8 += dst_stride;
+ } else if ((w == 4) && (h == 2)) {
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
+ 0); // 00 01 02 03
+ d_u8 += dst_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
+ 1); // 10 11 12 13
+ d_u8 += dst_stride;
+ } else if ((w == 2) && (h == 2)) {
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
+ 0); // 00 01
+ d_u8 += dst_stride;
+ vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
+ 2); // 10 11
+ d_u8 += dst_stride;
+ }
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ } else {
+ // if width is a multiple of 8 & height is a multiple of 4
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint8x8_t res0, res1, res2, res3;
+
+ do {
+ __builtin_prefetch(v_src_ptr + 0 * im_stride);
+ __builtin_prefetch(v_src_ptr + 1 * im_stride);
+ __builtin_prefetch(v_src_ptr + 2 * im_stride);
+ __builtin_prefetch(v_src_ptr + 3 * im_stride);
+ __builtin_prefetch(v_src_ptr + 4 * im_stride);
+ __builtin_prefetch(v_src_ptr + 5 * im_stride);
+ __builtin_prefetch(v_src_ptr + 6 * im_stride);
+ __builtin_prefetch(v_src_ptr + 7 * im_stride);
+
+ v_s = v_src_ptr;
+ load_s16_8x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+ v_s += (7 * im_stride);
+
+ d_u8 = dst_u8_ptr;
+ height = h;
+
+ do {
+ load_s16_8x4(v_s, im_stride, &s7, &s8, &s9, &s10);
+ v_s += (im_stride << 2);
+
+ __builtin_prefetch(d_u8 + 4 * dst_stride);
+ __builtin_prefetch(d_u8 + 5 * dst_stride);
+ __builtin_prefetch(d_u8 + 6 * dst_stride);
+ __builtin_prefetch(d_u8 + 7 * dst_stride);
+
+ res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7,
+ y_filter, round_shift_vec, offset_const,
+ sub_const_vec, vec_round_bits);
+ res1 = convolve8_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8,
+ y_filter, round_shift_vec, offset_const,
+ sub_const_vec, vec_round_bits);
+ res2 = convolve8_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9,
+ y_filter, round_shift_vec, offset_const,
+ sub_const_vec, vec_round_bits);
+ res3 = convolve8_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10,
+ y_filter, round_shift_vec, offset_const,
+ sub_const_vec, vec_round_bits);
+
+ if (h != 2) {
+ vst1_u8(d_u8, res0);
+ d_u8 += dst_stride;
+ vst1_u8(d_u8, res1);
+ d_u8 += dst_stride;
+ vst1_u8(d_u8, res2);
+ d_u8 += dst_stride;
+ vst1_u8(d_u8, res3);
+ d_u8 += dst_stride;
+ } else {
+ vst1_u8(d_u8, res0);
+ d_u8 += dst_stride;
+ vst1_u8(d_u8, res1);
+ d_u8 += dst_stride;
+ }
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ v_src_ptr += 8;
+ dst_u8_ptr += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+ }
+}
+void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+
+ const uint8_t *src1;
+ uint8_t *dst1;
+ int y;
+
+ if (!(w & 0x0F)) {
+ for (y = 0; y < h; ++y) {
+ src1 = src;
+ dst1 = dst;
+ for (int x = 0; x < (w >> 4); ++x) {
+ vst1q_u8(dst1, vld1q_u8(src1));
+ src1 += 16;
+ dst1 += 16;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else if (!(w & 0x07)) {
+ for (y = 0; y < h; ++y) {
+ vst1_u8(dst, vld1_u8(src));
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else if (!(w & 0x03)) {
+ for (y = 0; y < h; ++y) {
+ vst1_lane_u32((uint32_t *)(dst), vreinterpret_u32_u8(vld1_u8(src)), 0);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else if (!(w & 0x01)) {
+ for (y = 0; y < h; ++y) {
+ vst1_lane_u16((uint16_t *)(dst), vreinterpret_u16_u8(vld1_u8(src)), 0);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/arm/convolve_neon.h b/third_party/aom/av1/common/arm/convolve_neon.h
new file mode 100644
index 000000000..47c93d645
--- /dev/null
+++ b/third_party/aom/av1/common/arm/convolve_neon.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef AV1_COMMON_ARM_CONVOLVE_NEON_H_
+#define AV1_COMMON_ARM_CONVOLVE_NEON_H_
+
+#include <arm_neon.h>
+
+#define HORIZ_EXTRA_ROWS ((SUBPEL_TAPS + 7) & ~0x07)
+
+static INLINE uint8x8_t wiener_convolve8_vert_4x8(
+ const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, int16_t *filter_y, const int bd,
+ const int round1_bits) {
+ int16x8_t ss0, ss1, ss2;
+ int32x4_t sum0, sum1;
+ uint16x4_t tmp0, tmp1;
+ uint16x8_t tmp;
+ uint8x8_t res;
+
+ const int32_t round_const = (1 << (bd + round1_bits - 1));
+ const int32x4_t round_bits = vdupq_n_s32(-round1_bits);
+ const int32x4_t zero = vdupq_n_s32(0);
+ const int32x4_t round_vec = vdupq_n_s32(round_const);
+
+ ss0 = vaddq_s16(s0, s6);
+ ss1 = vaddq_s16(s1, s5);
+ ss2 = vaddq_s16(s2, s4);
+
+ sum0 = vmull_n_s16(vget_low_s16(ss0), filter_y[0]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]);
+ sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]);
+
+ sum1 = vmull_n_s16(vget_high_s16(ss0), filter_y[0]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]);
+ sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]);
+
+ sum0 = vsubq_s32(sum0, round_vec);
+ sum1 = vsubq_s32(sum1, round_vec);
+
+ /* right shift & rounding */
+ sum0 = vrshlq_s32(sum0, round_bits);
+ sum1 = vrshlq_s32(sum1, round_bits);
+
+ sum0 = vmaxq_s32(sum0, zero);
+ sum1 = vmaxq_s32(sum1, zero);
+
+ /* from int32x4_t to uint8x8_t */
+ tmp0 = vqmovn_u32(vreinterpretq_u32_s32(sum0));
+ tmp1 = vqmovn_u32(vreinterpretq_u32_s32(sum1));
+ tmp = vcombine_u16(tmp0, tmp1);
+ res = vqmovn_u16(tmp);
+
+ return res;
+}
+
+static INLINE uint16x8_t wiener_convolve8_horiz_8x8(
+ const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, int16_t *filter_x, const int bd,
+ const int round0_bits) {
+ int16x8_t sum;
+ uint16x8_t res;
+ int32x4_t sum_0, sum_1;
+ int32x4_t s3_0, s3_1;
+ const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1));
+ const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits));
+
+ /* for the purpose of right shift by { conv_params->round_0 } */
+ const int32x4_t round_bits = vdupq_n_s32(-round0_bits);
+
+ const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
+ const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
+
+ sum = vmulq_n_s16(s0, filter_x[0]);
+ sum = vmlaq_n_s16(sum, s1, filter_x[1]);
+ sum = vmlaq_n_s16(sum, s2, filter_x[2]);
+
+ /* sum from 16x8 to 2 32x4 registers */
+ sum_0 = vmovl_s16(vget_low_s16(sum));
+ sum_1 = vmovl_s16(vget_high_s16(sum));
+
+ /* s[3]*128 -- and filter coef max can be 128
+ * then max value possible = 128*128*255 exceeding 16 bit
+ */
+
+ s3_0 = vmull_n_s16(vget_low_s16(s3), filter_x[3]);
+ s3_1 = vmull_n_s16(vget_high_s16(s3), filter_x[3]);
+ sum_0 = vaddq_s32(sum_0, s3_0);
+ sum_1 = vaddq_s32(sum_1, s3_1);
+
+ /* Add the constant value */
+ sum_0 = vaddq_s32(sum_0, round_vec_0);
+ sum_1 = vaddq_s32(sum_1, round_vec_0);
+
+ /* right shift & rounding & saturating */
+ sum_0 = vqrshlq_s32(sum_0, round_bits);
+ sum_1 = vqrshlq_s32(sum_1, round_bits);
+
+ /* Clipping to max value */
+ sum_0 = vminq_s32(sum_0, round_vec_1);
+ sum_1 = vminq_s32(sum_1, round_vec_1);
+
+ res = vcombine_u16(vqmovun_s32(sum_0), vqmovun_s32(sum_1));
+ return res;
+}
+
+static INLINE uint16x4_t wiener_convolve8_horiz_4x8(
+ const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+ const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, int16_t *filter_x, const int bd,
+ const int round0_bits) {
+ uint16x4_t res;
+ int32x4_t sum_0, s3_0;
+ int16x4_t sum, temp0, temp1, temp2;
+
+ const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1));
+ const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits));
+ const int32x4_t round_bits = vdupq_n_s32(-round0_bits);
+ const int32x4_t zero = vdupq_n_s32(0);
+ const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
+ const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
+
+ temp0 = vadd_s16(s0, s6);
+ temp1 = vadd_s16(s1, s5);
+ temp2 = vadd_s16(s2, s4);
+
+ sum = vmul_n_s16(temp0, filter_x[0]);
+ sum = vmla_n_s16(sum, temp1, filter_x[1]);
+ sum = vmla_n_s16(sum, temp2, filter_x[2]);
+ sum_0 = vmovl_s16(sum);
+
+ /* s[3]*128 -- and filter coff max can be 128.
+ * then max value possible = 128*128*255 Therefore, 32 bits are required to
+ * hold the result.
+ */
+ s3_0 = vmull_n_s16(s3, filter_x[3]);
+ sum_0 = vaddq_s32(sum_0, s3_0);
+
+ sum_0 = vaddq_s32(sum_0, round_vec_0);
+ sum_0 = vrshlq_s32(sum_0, round_bits);
+
+ sum_0 = vmaxq_s32(sum_0, zero);
+ sum_0 = vminq_s32(sum_0, round_vec_1);
+ res = vqmovun_s32(sum_0);
+ return res;
+}
+
+static INLINE int16x8_t
+convolve8_8x8_s16(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+ const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
+ const int16x8_t horiz_const, const int16x8_t shift_round_0) {
+ int16x8_t sum;
+ int16x8_t res;
+
+ sum = horiz_const;
+ sum = vmlaq_n_s16(sum, s0, filter[0]);
+ sum = vmlaq_n_s16(sum, s1, filter[1]);
+ sum = vmlaq_n_s16(sum, s2, filter[2]);
+ sum = vmlaq_n_s16(sum, s3, filter[3]);
+ sum = vmlaq_n_s16(sum, s4, filter[4]);
+ sum = vmlaq_n_s16(sum, s5, filter[5]);
+ sum = vmlaq_n_s16(sum, s6, filter[6]);
+ sum = vmlaq_n_s16(sum, s7, filter[7]);
+
+ res = vqrshlq_s16(sum, shift_round_0);
+
+ return res;
+}
+
+static INLINE int16x4_t
+convolve8_4x4_s16(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+ const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, const int16x4_t s7, const int16_t *filter,
+ const int16x4_t horiz_const, const int16x4_t shift_round_0) {
+ int16x4_t sum;
+ sum = horiz_const;
+ sum = vmla_n_s16(sum, s0, filter[0]);
+ sum = vmla_n_s16(sum, s1, filter[1]);
+ sum = vmla_n_s16(sum, s2, filter[2]);
+ sum = vmla_n_s16(sum, s3, filter[3]);
+ sum = vmla_n_s16(sum, s4, filter[4]);
+ sum = vmla_n_s16(sum, s5, filter[5]);
+ sum = vmla_n_s16(sum, s6, filter[6]);
+ sum = vmla_n_s16(sum, s7, filter[7]);
+
+ sum = vqrshl_s16(sum, shift_round_0);
+
+ return sum;
+}
+
+static INLINE uint16x4_t convolve8_4x4_s32(
+ const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+ const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
+ const int32x4_t round_shift_vec, const int32x4_t offset_const) {
+ int32x4_t sum0;
+ uint16x4_t res;
+ const int32x4_t zero = vdupq_n_s32(0);
+
+ sum0 = vmull_n_s16(s0, y_filter[0]);
+ sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
+ sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
+ sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
+ sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
+ sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
+ sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
+ sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
+
+ sum0 = vaddq_s32(sum0, offset_const);
+ sum0 = vqrshlq_s32(sum0, round_shift_vec);
+ sum0 = vmaxq_s32(sum0, zero);
+ res = vmovn_u32(vreinterpretq_u32_s32(sum0));
+
+ return res;
+}
+
+#endif // AV1_COMMON_ARM_CONVOLVE_NEON_H_
diff --git a/third_party/aom/av1/common/arm/intrapred_neon.c b/third_party/aom/av1/common/arm/intrapred_neon.c
new file mode 100644
index 000000000..799355553
--- /dev/null
+++ b/third_party/aom/av1/common/arm/intrapred_neon.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
+#include "av1/common/arm/mem_neon.h"
+#include "config/aom_dsp_rtcd.h"
+
+static INLINE void highbd_dc_predictor_neon(uint16_t *dst, ptrdiff_t stride,
+ int bw, const uint16_t *above,
+ const uint16_t *left) {
+ assert(bw >= 4);
+ assert(IS_POWER_OF_TWO(bw));
+ int expected_dc, sum = 0;
+ const int count = bw * 2;
+ uint32x4_t sum_q = vdupq_n_u32(0);
+ uint32x2_t sum_d;
+ uint16_t *dst_1;
+ if (bw >= 8) {
+ for (int i = 0; i < bw; i += 8) {
+ sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
+ sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
+ above += 8;
+ left += 8;
+ }
+ sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
+ sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
+ expected_dc = (sum + (count >> 1)) / count;
+ const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
+ for (int r = 0; r < bw; r++) {
+ dst_1 = dst;
+ for (int i = 0; i < bw; i += 8) {
+ vst1q_u16(dst_1, dc);
+ dst_1 += 8;
+ }
+ dst += stride;
+ }
+ } else { // 4x4
+ sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
+ sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
+ sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
+ expected_dc = (sum + (count >> 1)) / count;
+ const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
+ for (int r = 0; r < bw; r++) {
+ vst1_u16(dst, dc);
+ dst += stride;
+ }
+ }
+}
+
+#define intra_pred_highbd_sized(type, width) \
+ void aom_highbd_##type##_predictor_##width##x##width##_neon( \
+ uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
+ const uint16_t *left, int bd) { \
+ (void)bd; \
+ highbd_##type##_predictor_neon(dst, stride, width, above, left); \
+ }
+
+#define intra_pred_square(type) \
+ intra_pred_highbd_sized(type, 4); \
+ intra_pred_highbd_sized(type, 8); \
+ intra_pred_highbd_sized(type, 16); \
+ intra_pred_highbd_sized(type, 32); \
+ intra_pred_highbd_sized(type, 64);
+
+intra_pred_square(dc);
+
+#undef intra_pred_square
diff --git a/third_party/aom/av1/common/arm/jnt_convolve_neon.c b/third_party/aom/av1/common/arm/jnt_convolve_neon.c
new file mode 100644
index 000000000..992be4a9e
--- /dev/null
+++ b/third_party/aom/av1/common/arm/jnt_convolve_neon.c
@@ -0,0 +1,1326 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+#include "av1/common/common.h"
+#include "av1/common/arm/convolve_neon.h"
+#include "av1/common/arm/mem_neon.h"
+#include "av1/common/arm/transpose_neon.h"
+
+static INLINE void compute_avg_4x4(
+ uint16x4_t res0, uint16x4_t res1, uint16x4_t res2, uint16x4_t res3,
+ uint16x4_t d0, uint16x4_t d1, uint16x4_t d2, uint16x4_t d3,
+ const uint16_t fwd_offset, const uint16_t bck_offset,
+ const int16x4_t sub_const_vec, const int16_t round_bits,
+ const int32_t use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1) {
+ int16x4_t tmp0, tmp1, tmp2, tmp3;
+ uint16x4_t tmp_u0, tmp_u1, tmp_u2, tmp_u3;
+ uint32x4_t sum0, sum1, sum2, sum3;
+
+ int32x4_t dst0, dst1, dst2, dst3;
+ int16x8_t tmp4, tmp5;
+ const int16x8_t zero = vdupq_n_s16(0);
+
+ if (use_jnt_comp_avg) {
+ const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits));
+ const int32x4_t const_vec = vmovl_s16(sub_const_vec);
+
+ sum0 = vmull_n_u16(res0, fwd_offset);
+ sum0 = vmlal_n_u16(sum0, d0, bck_offset);
+ sum1 = vmull_n_u16(res1, fwd_offset);
+ sum1 = vmlal_n_u16(sum1, d1, bck_offset);
+ sum2 = vmull_n_u16(res2, fwd_offset);
+ sum2 = vmlal_n_u16(sum2, d2, bck_offset);
+ sum3 = vmull_n_u16(res3, fwd_offset);
+ sum3 = vmlal_n_u16(sum3, d3, bck_offset);
+
+ sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
+ sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS);
+ sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS);
+ sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS);
+
+ dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), const_vec);
+ dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), const_vec);
+ dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), const_vec);
+ dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), const_vec);
+
+ dst0 = vqrshlq_s32(dst0, round_bits_vec);
+ dst1 = vqrshlq_s32(dst1, round_bits_vec);
+ dst2 = vqrshlq_s32(dst2, round_bits_vec);
+ dst3 = vqrshlq_s32(dst3, round_bits_vec);
+
+ tmp0 = vqmovn_s32(dst0);
+ tmp1 = vqmovn_s32(dst1);
+ tmp2 = vqmovn_s32(dst2);
+ tmp3 = vqmovn_s32(dst3);
+ tmp4 = vcombine_s16(tmp0, tmp1);
+ tmp5 = vcombine_s16(tmp2, tmp3);
+ tmp4 = vmaxq_s16(tmp4, zero);
+ tmp5 = vmaxq_s16(tmp5, zero);
+
+ *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
+ *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
+ } else {
+ const int16x4_t round_bits_vec = vdup_n_s16(-round_bits);
+ tmp_u0 = vhadd_u16(res0, d0);
+ tmp_u1 = vhadd_u16(res1, d1);
+ tmp_u2 = vhadd_u16(res2, d2);
+ tmp_u3 = vhadd_u16(res3, d3);
+
+ tmp0 = vsub_s16(vreinterpret_s16_u16(tmp_u0), sub_const_vec);
+ tmp1 = vsub_s16(vreinterpret_s16_u16(tmp_u1), sub_const_vec);
+ tmp2 = vsub_s16(vreinterpret_s16_u16(tmp_u2), sub_const_vec);
+ tmp3 = vsub_s16(vreinterpret_s16_u16(tmp_u3), sub_const_vec);
+
+ tmp0 = vqrshl_s16(tmp0, round_bits_vec);
+ tmp1 = vqrshl_s16(tmp1, round_bits_vec);
+ tmp2 = vqrshl_s16(tmp2, round_bits_vec);
+ tmp3 = vqrshl_s16(tmp3, round_bits_vec);
+
+ tmp4 = vcombine_s16(tmp0, tmp1);
+ tmp5 = vcombine_s16(tmp2, tmp3);
+ tmp4 = vmaxq_s16(tmp4, zero);
+ tmp5 = vmaxq_s16(tmp5, zero);
+
+ *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
+ *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
+ }
+}
+
+static INLINE void compute_avg_8x4(
+ uint16x8_t res0, uint16x8_t res1, uint16x8_t res2, uint16x8_t res3,
+ uint16x8_t d0, uint16x8_t d1, uint16x8_t d2, uint16x8_t d3,
+ const uint16_t fwd_offset, const uint16_t bck_offset,
+ const int16x4_t sub_const, const int16_t round_bits,
+ const int32_t use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1, uint8x8_t *t2,
+ uint8x8_t *t3) {
+ int16x4_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int16x8_t f0, f1, f2, f3;
+ uint32x4_t sum0, sum1, sum2, sum3;
+ uint32x4_t sum4, sum5, sum6, sum7;
+ int32x4_t dst0, dst1, dst2, dst3;
+ int32x4_t dst4, dst5, dst6, dst7;
+ uint16x8_t tmp_u0, tmp_u1, tmp_u2, tmp_u3;
+ const int16x8_t zero = vdupq_n_s16(0);
+
+ if (use_jnt_comp_avg) {
+ const int32x4_t sub_const_vec = vmovl_s16(sub_const);
+ const int32x4_t round_bits_vec = vdupq_n_s32(-(int32_t)round_bits);
+
+ sum0 = vmull_n_u16(vget_low_u16(res0), fwd_offset);
+ sum0 = vmlal_n_u16(sum0, vget_low_u16(d0), bck_offset);
+ sum1 = vmull_n_u16(vget_low_u16(res1), fwd_offset);
+ sum1 = vmlal_n_u16(sum1, vget_low_u16(d1), bck_offset);
+ sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
+ sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS);
+
+ sum2 = vmull_n_u16(vget_high_u16(res0), fwd_offset);
+ sum2 = vmlal_n_u16(sum2, vget_high_u16(d0), bck_offset);
+ sum3 = vmull_n_u16(vget_high_u16(res1), fwd_offset);
+ sum3 = vmlal_n_u16(sum3, vget_high_u16(d1), bck_offset);
+ sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS);
+ sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS);
+
+ sum4 = vmull_n_u16(vget_low_u16(res2), fwd_offset);
+ sum4 = vmlal_n_u16(sum4, vget_low_u16(d2), bck_offset);
+ sum5 = vmull_n_u16(vget_low_u16(res3), fwd_offset);
+ sum5 = vmlal_n_u16(sum5, vget_low_u16(d3), bck_offset);
+ sum4 = vshrq_n_u32(sum4, DIST_PRECISION_BITS);
+ sum5 = vshrq_n_u32(sum5, DIST_PRECISION_BITS);
+
+ sum6 = vmull_n_u16(vget_high_u16(res2), fwd_offset);
+ sum6 = vmlal_n_u16(sum6, vget_high_u16(d2), bck_offset);
+ sum7 = vmull_n_u16(vget_high_u16(res3), fwd_offset);
+ sum7 = vmlal_n_u16(sum7, vget_high_u16(d3), bck_offset);
+ sum6 = vshrq_n_u32(sum6, DIST_PRECISION_BITS);
+ sum7 = vshrq_n_u32(sum7, DIST_PRECISION_BITS);
+
+ dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), sub_const_vec);
+ dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), sub_const_vec);
+ dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), sub_const_vec);
+ dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), sub_const_vec);
+ dst4 = vsubq_s32(vreinterpretq_s32_u32(sum4), sub_const_vec);
+ dst5 = vsubq_s32(vreinterpretq_s32_u32(sum5), sub_const_vec);
+ dst6 = vsubq_s32(vreinterpretq_s32_u32(sum6), sub_const_vec);
+ dst7 = vsubq_s32(vreinterpretq_s32_u32(sum7), sub_const_vec);
+
+ dst0 = vqrshlq_s32(dst0, round_bits_vec);
+ dst1 = vqrshlq_s32(dst1, round_bits_vec);
+ dst2 = vqrshlq_s32(dst2, round_bits_vec);
+ dst3 = vqrshlq_s32(dst3, round_bits_vec);
+ dst4 = vqrshlq_s32(dst4, round_bits_vec);
+ dst5 = vqrshlq_s32(dst5, round_bits_vec);
+ dst6 = vqrshlq_s32(dst6, round_bits_vec);
+ dst7 = vqrshlq_s32(dst7, round_bits_vec);
+
+ tmp0 = vqmovn_s32(dst0);
+ tmp1 = vqmovn_s32(dst1);
+ tmp2 = vqmovn_s32(dst2);
+ tmp3 = vqmovn_s32(dst3);
+ tmp4 = vqmovn_s32(dst4);
+ tmp5 = vqmovn_s32(dst5);
+ tmp6 = vqmovn_s32(dst6);
+ tmp7 = vqmovn_s32(dst7);
+
+ f0 = vcombine_s16(tmp0, tmp2);
+ f1 = vcombine_s16(tmp1, tmp3);
+ f2 = vcombine_s16(tmp4, tmp6);
+ f3 = vcombine_s16(tmp5, tmp7);
+
+ f0 = vmaxq_s16(f0, zero);
+ f1 = vmaxq_s16(f1, zero);
+ f2 = vmaxq_s16(f2, zero);
+ f3 = vmaxq_s16(f3, zero);
+
+ *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
+ *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
+ *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
+ *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
+
+ } else {
+ const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const);
+ const int16x8_t round_bits_vec = vdupq_n_s16(-round_bits);
+
+ tmp_u0 = vhaddq_u16(res0, d0);
+ tmp_u1 = vhaddq_u16(res1, d1);
+ tmp_u2 = vhaddq_u16(res2, d2);
+ tmp_u3 = vhaddq_u16(res3, d3);
+
+ f0 = vsubq_s16(vreinterpretq_s16_u16(tmp_u0), sub_const_vec);
+ f1 = vsubq_s16(vreinterpretq_s16_u16(tmp_u1), sub_const_vec);
+ f2 = vsubq_s16(vreinterpretq_s16_u16(tmp_u2), sub_const_vec);
+ f3 = vsubq_s16(vreinterpretq_s16_u16(tmp_u3), sub_const_vec);
+
+ f0 = vqrshlq_s16(f0, round_bits_vec);
+ f1 = vqrshlq_s16(f1, round_bits_vec);
+ f2 = vqrshlq_s16(f2, round_bits_vec);
+ f3 = vqrshlq_s16(f3, round_bits_vec);
+
+ f0 = vmaxq_s16(f0, zero);
+ f1 = vmaxq_s16(f1, zero);
+ f2 = vmaxq_s16(f2, zero);
+ f3 = vmaxq_s16(f3, zero);
+
+ *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
+ *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
+ *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
+ *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
+ }
+}
+
+static INLINE void jnt_convolve_2d_horiz_neon(
+ const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride,
+ int16_t *x_filter_tmp, const int im_h, int w, const int round_0) {
+ const int bd = 8;
+ const uint8_t *s;
+ int16_t *dst_ptr;
+ int dst_stride;
+ int width, height;
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+ dst_ptr = im_block;
+ dst_stride = im_stride;
+ height = im_h;
+ width = w;
+
+ if (w == 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+ int16x8_t tt0, tt1, tt2, tt3;
+
+ const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)));
+ const int16x4_t shift_round_0 = vdup_n_s16(-(round_0));
+
+ do {
+ s = src;
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+ tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s0 = vget_low_s16(tt0);
+ s1 = vget_low_s16(tt1);
+ s2 = vget_low_s16(tt2);
+ s3 = vget_low_s16(tt3);
+ s4 = vget_high_s16(tt0);
+ s5 = vget_high_s16(tt1);
+ s6 = vget_high_s16(tt2);
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+ s += 7;
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+ tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s7 = vget_low_s16(tt0);
+ s8 = vget_low_s16(tt1);
+ s9 = vget_low_s16(tt2);
+ s10 = vget_low_s16(tt3);
+
+ d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ horiz_const, shift_round_0);
+ d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ horiz_const, shift_round_0);
+
+ transpose_s16_4x4d(&d0, &d1, &d2, &d3);
+
+ vst1_s16((dst_ptr + 0 * dst_stride), d0);
+ vst1_s16((dst_ptr + 1 * dst_stride), d1);
+ vst1_s16((dst_ptr + 2 * dst_stride), d2);
+ vst1_s16((dst_ptr + 3 * dst_stride), d3);
+
+ src += 4 * src_stride;
+ dst_ptr += 4 * dst_stride;
+ height -= 4;
+ } while (height > 0);
+ } else {
+ int16_t *d_tmp;
+ int16x8_t s11, s12, s13, s14;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int16x8_t res0, res1, res2, res3, res4, res5, res6, res7;
+
+ const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)));
+ const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0));
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ width = w;
+ s = src + 7;
+ d_tmp = dst_ptr;
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+ __builtin_prefetch(dst_ptr + 4 * dst_stride);
+ __builtin_prefetch(dst_ptr + 5 * dst_stride);
+ __builtin_prefetch(dst_ptr + 6 * dst_stride);
+ __builtin_prefetch(dst_ptr + 7 * dst_stride);
+
+ do {
+ load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
+
+ res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
+ horiz_const, shift_round_0);
+ res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
+ x_filter_tmp, horiz_const, shift_round_0);
+ res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
+ x_filter_tmp, horiz_const, shift_round_0);
+ res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
+ x_filter_tmp, horiz_const, shift_round_0);
+
+ transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
+ &res7);
+
+ store_s16_8x8(d_tmp, dst_stride, res0, res1, res2, res3, res4, res5,
+ res6, res7);
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d_tmp += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 8 * src_stride;
+ dst_ptr += 8 * dst_stride;
+ height -= 8;
+ } while (height > 0);
+ }
+}
+
+static INLINE void jnt_convolve_2d_vert_neon(
+ int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride,
+ ConvolveParams *conv_params, const int16_t *y_filter, int h, int w) {
+ uint8_t *dst_u8_ptr, *d_u8;
+ CONV_BUF_TYPE *dst_ptr, *dst;
+ int16_t *src_ptr, *s;
+ uint16_t *d;
+
+ const int bd = 8;
+ int height;
+ int dst_stride = conv_params->dst_stride;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int16_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+
+ const int16_t round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
+ const int32x4_t offset_const = vdupq_n_s32(1 << offset);
+ const int16x4_t sub_const_vec = vdup_n_s16(sub_const);
+ const uint16_t fwd_offset = conv_params->fwd_offset;
+ const uint16_t bck_offset = conv_params->bck_offset;
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x4_t res4, res5, res6, res7;
+ uint16x4_t d0, d1, d2, d3;
+ uint8x8_t t0, t1;
+
+ dst = conv_params->dst;
+ src_ptr = im_block;
+ dst_u8_ptr = dst8;
+ dst_ptr = dst;
+ height = h;
+
+ do {
+ d = dst_ptr;
+ d_u8 = dst_u8_ptr;
+ s = src_ptr;
+ height = h;
+
+ __builtin_prefetch(s + 0 * im_stride);
+ __builtin_prefetch(s + 1 * im_stride);
+ __builtin_prefetch(s + 2 * im_stride);
+ __builtin_prefetch(s + 3 * im_stride);
+ __builtin_prefetch(s + 4 * im_stride);
+ __builtin_prefetch(s + 5 * im_stride);
+ __builtin_prefetch(s + 6 * im_stride);
+ __builtin_prefetch(s + 7 * im_stride);
+
+ load_s16_4x8(s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+ s += (7 * im_stride);
+
+ do {
+ load_s16_4x4(s, im_stride, &s7, &s8, &s9, &s10);
+ s += (im_stride << 2);
+
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+
+ __builtin_prefetch(d_u8 + 4 * dst8_stride);
+ __builtin_prefetch(d_u8 + 5 * dst8_stride);
+ __builtin_prefetch(d_u8 + 6 * dst8_stride);
+ __builtin_prefetch(d_u8 + 7 * dst8_stride);
+
+ d0 = convolve8_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+ round_shift_vec, offset_const);
+ d1 = convolve8_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
+ round_shift_vec, offset_const);
+ d2 = convolve8_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
+ round_shift_vec, offset_const);
+ d3 = convolve8_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
+ round_shift_vec, offset_const);
+
+ if (do_average) {
+ load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
+ d += (dst_stride << 2);
+
+ compute_avg_4x4(res4, res5, res6, res7, d0, d1, d2, d3, fwd_offset,
+ bck_offset, sub_const_vec, round_bits, use_jnt_comp_avg,
+ &t0, &t1);
+
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
+ 0); // 00 01 02 03
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
+ 1); // 10 11 12 13
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1),
+ 0); // 20 21 22 23
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1),
+ 1); // 30 31 32 33
+ d_u8 += dst8_stride;
+
+ } else {
+ store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
+ d += (dst_stride << 2);
+ }
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ src_ptr += 4;
+ dst_ptr += 4;
+ dst_u8_ptr += 4;
+ w -= 4;
+ } while (w > 0);
+}
+
+void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ assert(!(w % 4));
+ assert(!(h % 4));
+
+ DECLARE_ALIGNED(16, int16_t,
+ im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
+
+ const int im_h = h + filter_params_y->taps - 1;
+ const int im_stride = MAX_SB_SIZE;
+ const int vert_offset = filter_params_y->taps / 2 - 1;
+ const int horiz_offset = filter_params_x->taps / 2 - 1;
+ const int round_0 = conv_params->round_0 - 1;
+ const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+
+ int16_t x_filter_tmp[8];
+ int16x8_t filter_x_coef = vld1q_s16(x_filter);
+
+ // filter coeffs are even, so downshifting by 1 to reduce intermediate
+ // precision requirements.
+ filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
+ vst1q_s16(&x_filter_tmp[0], filter_x_coef);
+
+ jnt_convolve_2d_horiz_neon(src_ptr, src_stride, im_block, im_stride,
+ x_filter_tmp, im_h, w, round_0);
+
+ jnt_convolve_2d_vert_neon(im_block, im_stride, dst8, dst8_stride, conv_params,
+ y_filter, h, w);
+}
+
+void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
+ uint8_t *dst8, int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ uint8x8_t res0_8, res1_8, res2_8, res3_8, tmp_shift0, tmp_shift1, tmp_shift2,
+ tmp_shift3;
+ uint16x8_t res_q0, res_q1, res_q2, res_q3, tmp_q0, tmp_q1, tmp_q2, tmp_q3;
+ uint16x4_t tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7;
+ const uint8_t *src1, *src2;
+ uint8_t *dst8_1;
+ CONV_BUF_TYPE *dst = conv_params->dst, *dst_1, *dst_2;
+ const int dst_stride = conv_params->dst_stride;
+ int x, y;
+ const int16_t bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const int bd = 8;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int16x4_t sub_const_vec = vdup_n_s16((int16_t)round_offset);
+ const uint16x8_t dup_round_offset16x8 = vdupq_n_u16((uint16_t)round_offset);
+ const int16x4_t dup_bits16x4 = vdup_n_s16(bits);
+ const int16x8_t dup_bits16x8 = vdupq_n_s16(bits);
+
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ if (!(w & 0x07)) {
+ for (y = 0; y < (h >> 2); ++y) {
+ src1 = src;
+ dst8_1 = dst8;
+ dst_1 = dst;
+ for (x = 0; x < (w >> 3); ++x) {
+ src2 = src1;
+ load_u8_8x4(src2, src_stride, &res0_8, &res1_8, &res2_8, &res3_8);
+
+ res_q0 = vaddq_u16(vshlq_u16(vmovl_u8(res0_8), dup_bits16x8),
+ dup_round_offset16x8);
+ res_q1 = vaddq_u16(vshlq_u16(vmovl_u8(res1_8), dup_bits16x8),
+ dup_round_offset16x8);
+ res_q2 = vaddq_u16(vshlq_u16(vmovl_u8(res2_8), dup_bits16x8),
+ dup_round_offset16x8);
+ res_q3 = vaddq_u16(vshlq_u16(vmovl_u8(res3_8), dup_bits16x8),
+ dup_round_offset16x8);
+
+ if (conv_params->do_average) {
+ dst_2 = dst_1;
+ load_u16_8x4(dst_2, dst_stride, &tmp_q0, &tmp_q1, &tmp_q2, &tmp_q3);
+
+ compute_avg_8x4(tmp_q0, tmp_q1, tmp_q2, tmp_q3, res_q0, res_q1,
+ res_q2, res_q3, conv_params->fwd_offset,
+ conv_params->bck_offset, sub_const_vec, bits,
+ conv_params->use_jnt_comp_avg, &tmp_shift0,
+ &tmp_shift1, &tmp_shift2, &tmp_shift3);
+
+ vst1_u8(dst8_1 + (0 * dst8_stride), tmp_shift0);
+ vst1_u8(dst8_1 + (1 * dst8_stride), tmp_shift1);
+ vst1_u8(dst8_1 + (2 * dst8_stride), tmp_shift2);
+ vst1_u8(dst8_1 + (3 * dst8_stride), tmp_shift3);
+
+ } else {
+ vst1q_u16(dst_1 + (0 * dst_stride), res_q0);
+ vst1q_u16(dst_1 + (1 * dst_stride), res_q1);
+ vst1q_u16(dst_1 + (2 * dst_stride), res_q2);
+ vst1q_u16(dst_1 + (3 * dst_stride), res_q3);
+ }
+ src1 = src1 + 8;
+ dst_1 = dst_1 + 8;
+ dst8_1 = dst8_1 + 8;
+ }
+ src += src_stride * 4;
+ dst8 += dst8_stride * 4;
+ dst += dst_stride * 4;
+ }
+ } else if (!(w & 0x03)) {
+ for (y = 0; y < (h >> 2); ++y) {
+ src1 = src;
+ dst8_1 = dst8;
+ dst_1 = dst;
+
+ load_u8_8x4(src1, src_stride, &res0_8, &res1_8, &res2_8, &res3_8);
+
+ res4 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res0_8)), dup_bits16x4),
+ vreinterpret_u16_s16(sub_const_vec));
+ res5 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res1_8)), dup_bits16x4),
+ vreinterpret_u16_s16(sub_const_vec));
+ res6 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res2_8)), dup_bits16x4),
+ vreinterpret_u16_s16(sub_const_vec));
+ res7 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res3_8)), dup_bits16x4),
+ vreinterpret_u16_s16(sub_const_vec));
+ if (conv_params->do_average) {
+ load_u16_4x4(dst_1, dst_stride, &tmp4, &tmp5, &tmp6, &tmp7);
+
+ compute_avg_4x4(tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7,
+ conv_params->fwd_offset, conv_params->bck_offset,
+ sub_const_vec, bits, conv_params->use_jnt_comp_avg,
+ &tmp_shift0, &tmp_shift1);
+
+ vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 0);
+ dst8_1 += dst8_stride;
+ vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 1);
+ dst8_1 += dst8_stride;
+ vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 0);
+ dst8_1 += dst8_stride;
+ vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 1);
+
+ } else {
+ vst1_u16(dst_1, res4);
+ dst_1 += dst_stride;
+ vst1_u16(dst_1, res5);
+ dst_1 += dst_stride;
+ vst1_u16(dst_1, res6);
+ dst_1 += dst_stride;
+ vst1_u16(dst_1, res7);
+ }
+ src += src_stride * 4;
+ dst += dst_stride * 4;
+ dst8 += dst8_stride * 4;
+ }
+ }
+}
+
+void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ assert(!(w % 4));
+ assert(!(h % 4));
+
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int horiz_offset = filter_params_x->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ const int bd = 8;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const uint16_t fwd_offset = conv_params->fwd_offset;
+ const uint16_t bck_offset = conv_params->bck_offset;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ // horizontal filter
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+
+ const uint8_t *src_ptr = src - horiz_offset;
+
+ int16_t x_filter_tmp[8];
+ int16x8_t filter_x_coef = vld1q_s16(x_filter);
+
+ // filter coeffs are even, so downshifting by 1 to reduce intermediate
+ // precision requirements.
+ filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
+ vst1q_s16(&x_filter_tmp[0], filter_x_coef);
+
+ const uint8_t *s;
+ uint8_t *d_u8;
+ uint8_t *dst_u8_ptr;
+ CONV_BUF_TYPE *d, *dst_ptr;
+ int width, height;
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+ s = src_ptr;
+ dst_ptr = dst;
+ dst_u8_ptr = dst8;
+ width = w;
+ height = h;
+
+ if ((w == 4) || (h == 4)) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+ int16x8_t tt0, tt1, tt2, tt3;
+ uint16x4_t res4, res5, res6, res7;
+ uint32x2_t tu0, tu1;
+ int16x8_t u0, u1;
+ const int16x4_t zero = vdup_n_s16(0);
+ const int16x4_t round_offset_vec = vdup_n_s16(round_offset);
+ const int16x4_t shift_round_0 = vdup_n_s16(-conv_params->round_0 + 1);
+ const int16x4_t horiz_const = vdup_n_s16(bits);
+ do {
+ s = src_ptr;
+ d = dst_ptr;
+ d_u8 = dst_u8_ptr;
+ width = w;
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+ tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s0 = vget_low_s16(tt0);
+ s1 = vget_low_s16(tt1);
+ s2 = vget_low_s16(tt2);
+ s3 = vget_low_s16(tt3);
+ s4 = vget_high_s16(tt0);
+ s5 = vget_high_s16(tt1);
+ s6 = vget_high_s16(tt2);
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ s += 7;
+ do {
+ load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1);
+ t0 = vreinterpret_u8_u32(tu0);
+ t1 = vreinterpret_u8_u32(tu1);
+
+ transpose_u8_4x4(&t0, &t1);
+ u0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ u1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+
+ s7 = vget_low_s16(u0);
+ s8 = vget_low_s16(u1);
+ s9 = vget_high_s16(u0);
+ s10 = vget_high_s16(u1);
+
+ d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ zero, shift_round_0);
+ d0 = vrshl_s16(d0, horiz_const);
+ d0 = vadd_s16(d0, round_offset_vec);
+ d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ zero, shift_round_0);
+ d1 = vrshl_s16(d1, horiz_const);
+ d1 = vadd_s16(d1, round_offset_vec);
+ d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ zero, shift_round_0);
+ d2 = vrshl_s16(d2, horiz_const);
+ d2 = vadd_s16(d2, round_offset_vec);
+ d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ zero, shift_round_0);
+ d3 = vrshl_s16(d3, horiz_const);
+ d3 = vadd_s16(d3, round_offset_vec);
+
+ transpose_s16_4x4d(&d0, &d1, &d2, &d3);
+
+ if (conv_params->do_average) {
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+
+ __builtin_prefetch(d_u8 + 0 * dst8_stride);
+ __builtin_prefetch(d_u8 + 1 * dst8_stride);
+ __builtin_prefetch(d_u8 + 2 * dst8_stride);
+ __builtin_prefetch(d_u8 + 3 * dst8_stride);
+
+ load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
+
+ compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0),
+ vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
+ vreinterpret_u16_s16(d3), fwd_offset, bck_offset,
+ round_offset_vec, round_bits, use_jnt_comp_avg, &t0,
+ &t1);
+
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
+ 0); // 00 01 02 03
+ vst1_lane_u32((uint32_t *)(d_u8 + dst8_stride),
+ vreinterpret_u32_u8(t0),
+ 1); // 10 11 12 13
+ vst1_lane_u32((uint32_t *)(d_u8 + 2 * dst8_stride),
+ vreinterpret_u32_u8(t1),
+ 0); // 20 21 22 23
+ vst1_lane_u32((uint32_t *)(d_u8 + 3 * dst8_stride),
+ vreinterpret_u32_u8(t1),
+ 1); // 30 31 32 33
+ } else {
+ store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0),
+ vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
+ vreinterpret_u16_s16(d3));
+ }
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+
+ s += 4;
+ width -= 4;
+ d += 4;
+ d_u8 += 4;
+ } while (width > 0);
+ src_ptr += (src_stride << 2);
+ dst_ptr += (dst_stride << 2);
+ dst_u8_ptr += (dst8_stride << 2);
+ height -= 4;
+ } while (height > 0);
+ } else {
+ CONV_BUF_TYPE *d_tmp;
+ uint8_t *d_u8_tmp;
+ int16x8_t s11, s12, s13, s14;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int16x8_t res0, res1, res2, res3, res4, res5, res6, res7;
+ uint16x8_t res8, res9, res10, res11;
+
+ const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
+ const int16x4_t round_offset64 = vdup_n_s16(round_offset);
+ const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
+ const int16x8_t horiz_const = vdupq_n_s16(bits);
+ const int16x8_t zero = vdupq_n_s16(0);
+
+ d = dst_ptr = dst;
+ d_u8 = dst_u8_ptr = dst8;
+ do {
+ __builtin_prefetch(src_ptr + 0 * src_stride);
+ __builtin_prefetch(src_ptr + 1 * src_stride);
+ __builtin_prefetch(src_ptr + 2 * src_stride);
+ __builtin_prefetch(src_ptr + 3 * src_stride);
+ __builtin_prefetch(src_ptr + 4 * src_stride);
+ __builtin_prefetch(src_ptr + 5 * src_stride);
+ __builtin_prefetch(src_ptr + 6 * src_stride);
+ __builtin_prefetch(src_ptr + 7 * src_stride);
+ load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ width = w;
+ s = src_ptr + 7;
+ d = dst_ptr;
+ d_u8_tmp = dst_u8_ptr;
+
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+ __builtin_prefetch(dst_ptr + 4 * dst_stride);
+ __builtin_prefetch(dst_ptr + 5 * dst_stride);
+ __builtin_prefetch(dst_ptr + 6 * dst_stride);
+ __builtin_prefetch(dst_ptr + 7 * dst_stride);
+
+ do {
+ d_u8 = d_u8_tmp;
+ d_tmp = d;
+
+ load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
+
+ res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+ zero, shift_round_0);
+
+ res0 = vrshlq_s16(res0, horiz_const);
+ res0 = vaddq_s16(res0, round_offset128);
+
+ res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+ zero, shift_round_0);
+ res1 = vrshlq_s16(res1, horiz_const);
+ res1 = vaddq_s16(res1, round_offset128);
+ res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+ zero, shift_round_0);
+ res2 = vrshlq_s16(res2, horiz_const);
+ res2 = vaddq_s16(res2, round_offset128);
+ res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+ zero, shift_round_0);
+ res3 = vrshlq_s16(res3, horiz_const);
+ res3 = vaddq_s16(res3, round_offset128);
+ res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
+ zero, shift_round_0);
+ res4 = vrshlq_s16(res4, horiz_const);
+ res4 = vaddq_s16(res4, round_offset128);
+ res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
+ x_filter_tmp, zero, shift_round_0);
+ res5 = vrshlq_s16(res5, horiz_const);
+ res5 = vaddq_s16(res5, round_offset128);
+ res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
+ x_filter_tmp, zero, shift_round_0);
+ res6 = vrshlq_s16(res6, horiz_const);
+ res6 = vaddq_s16(res6, round_offset128);
+ res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
+ x_filter_tmp, zero, shift_round_0);
+ res7 = vrshlq_s16(res7, horiz_const);
+ res7 = vaddq_s16(res7, round_offset128);
+
+ transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
+ &res7);
+
+ if (conv_params->do_average) {
+ load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
+ d_tmp += (dst_stride << 2);
+
+ compute_avg_8x4(
+ res8, res9, res10, res11, vreinterpretq_u16_s16(res0),
+ vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
+ vreinterpretq_u16_s16(res3), fwd_offset, bck_offset,
+ round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
+
+ store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
+ d_u8 += (dst8_stride << 2);
+
+ load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
+ d_tmp += (dst_stride << 2);
+
+ compute_avg_8x4(
+ res8, res9, res10, res11, vreinterpretq_u16_s16(res4),
+ vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
+ vreinterpretq_u16_s16(res7), fwd_offset, bck_offset,
+ round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
+
+ store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
+ d_u8 += (dst8_stride << 2);
+ } else {
+ store_u16_8x8(
+ d_tmp, dst_stride, vreinterpretq_u16_s16(res0),
+ vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
+ vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4),
+ vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
+ vreinterpretq_u16_s16(res7));
+ d_tmp += (dst_stride << 3);
+ }
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d += 8;
+ width -= 8;
+ d_u8_tmp += 8;
+ } while (width > 0);
+ src_ptr += 8 * src_stride;
+ dst_ptr += 8 * dst_stride;
+ dst_u8_ptr += 8 * dst8_stride;
+ height -= 8;
+ } while (height > 0);
+ }
+}
+
+void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ assert(!(w % 4));
+ assert(!(h % 4));
+
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ const int dst_stride = conv_params->dst_stride;
+ const int vert_offset = filter_params_y->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ const int bd = 8;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const uint16_t fwd_offset = conv_params->fwd_offset;
+ const uint16_t bck_offset = conv_params->bck_offset;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int shift_value = (conv_params->round_1 - 1 - bits);
+
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+
+ // vertical filter
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+
+ const uint8_t *src_ptr = src - (vert_offset * src_stride);
+
+ int16_t y_filter_tmp[8];
+ int16x8_t filter_y_coef = vld1q_s16(y_filter);
+
+ // filter coeffs are even, so downshifting by 1 to reduce intermediate
+ // precision requirements.
+ filter_y_coef = vshrq_n_s16(filter_y_coef, 1);
+ vst1q_s16(&y_filter_tmp[0], filter_y_coef);
+
+ const uint8_t *s;
+ uint8_t *d_u8;
+ uint8_t *dst_u8_ptr;
+ CONV_BUF_TYPE *d, *dst_ptr;
+ int width, height;
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+ s = src_ptr;
+ dst_ptr = dst;
+ dst_u8_ptr = dst8;
+ width = w;
+ height = h;
+
+ // used to get rid of multiplication = (vertical filter output sum) *
+ // (1<<bits).
+ assert((conv_params->round_1 - 2) >= bits);
+
+ if ((w == 4) || (h == 4)) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+ uint16x4_t res4, res5, res6, res7;
+ uint32x2_t tu0, tu1, tu2, tu3;
+ int16x8_t u0, u1, u2, u3;
+
+ const int16x4_t round_offset64 = vdup_n_s16(round_offset);
+ const int16x4_t shift_vec = vdup_n_s16(-shift_value);
+ const int16x4_t zero = vdup_n_s16(0);
+
+ do {
+ s = src_ptr;
+ d = dst_ptr;
+ d_u8 = dst_u8_ptr;
+ height = h;
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+
+ load_unaligned_u8_4x8(s, src_stride, &tu0, &tu1, &tu2, &tu3);
+
+ u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
+ u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1)));
+ u2 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu2)));
+ u3 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu3)));
+
+ s0 = vget_low_s16(u0);
+ s1 = vget_high_s16(u0);
+ s2 = vget_low_s16(u1);
+ s3 = vget_high_s16(u1);
+ s4 = vget_low_s16(u2);
+ s5 = vget_high_s16(u2);
+ s6 = vget_low_s16(u3);
+
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+
+ s += (7 * src_stride);
+ do {
+ load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1);
+
+ u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
+ u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1)));
+
+ s7 = vget_low_s16(u0);
+ s8 = vget_high_s16(u0);
+ s9 = vget_low_s16(u1);
+ s10 = vget_high_s16(u1);
+
+ d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
+ zero, shift_vec);
+ d0 = vadd_s16(d0, round_offset64);
+ d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
+ zero, shift_vec);
+ d1 = vadd_s16(d1, round_offset64);
+ d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
+ zero, shift_vec);
+ d2 = vadd_s16(d2, round_offset64);
+ d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
+ zero, shift_vec);
+ d3 = vadd_s16(d3, round_offset64);
+
+ if (conv_params->do_average) {
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+
+ __builtin_prefetch(d_u8 + 0 * dst8_stride);
+ __builtin_prefetch(d_u8 + 1 * dst8_stride);
+ __builtin_prefetch(d_u8 + 2 * dst8_stride);
+ __builtin_prefetch(d_u8 + 3 * dst8_stride);
+
+ load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
+ d += (dst_stride << 2);
+
+ compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0),
+ vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
+ vreinterpret_u16_s16(d3), fwd_offset, bck_offset,
+ round_offset64, round_bits, use_jnt_comp_avg, &t0,
+ &t1);
+
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
+ 0); // 00 01 02 03
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
+ 1); // 10 11 12 13
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1),
+ 0); // 20 21 22 23
+ d_u8 += dst8_stride;
+ vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1),
+ 1); // 30 31 32 33
+ d_u8 += dst8_stride;
+ } else {
+ store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0),
+ vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
+ vreinterpret_u16_s16(d3));
+ d += (dst_stride << 2);
+ }
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+
+ s += (src_stride << 2);
+ height -= 4;
+ } while (height > 0);
+ src_ptr += 4;
+ dst_ptr += 4;
+ dst_u8_ptr += 4;
+ width -= 4;
+ } while (width > 0);
+ } else {
+ CONV_BUF_TYPE *d_tmp;
+ int16x8_t s11, s12, s13, s14;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int16x8_t res0, res1, res2, res3, res4, res5, res6, res7;
+ uint16x8_t res8, res9, res10, res11;
+ const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
+ const int16x8_t shift_vec = vdupq_n_s16(-shift_value);
+ const int16x4_t round_offset64 = vdup_n_s16(round_offset);
+ const int16x8_t zero = vdupq_n_s16(0);
+
+ dst_ptr = dst;
+ dst_u8_ptr = dst8;
+ do {
+ __builtin_prefetch(src_ptr + 0 * src_stride);
+ __builtin_prefetch(src_ptr + 1 * src_stride);
+ __builtin_prefetch(src_ptr + 2 * src_stride);
+ __builtin_prefetch(src_ptr + 3 * src_stride);
+ __builtin_prefetch(src_ptr + 4 * src_stride);
+ __builtin_prefetch(src_ptr + 5 * src_stride);
+ __builtin_prefetch(src_ptr + 6 * src_stride);
+ __builtin_prefetch(src_ptr + 7 * src_stride);
+ load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
+
+ height = h;
+ s = src_ptr + (7 * src_stride);
+ d_tmp = dst_ptr;
+ d_u8 = dst_u8_ptr;
+
+ do {
+ load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
+
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+
+ res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
+ zero, shift_vec);
+ res0 = vaddq_s16(res0, round_offset128);
+ res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
+ zero, shift_vec);
+ res1 = vaddq_s16(res1, round_offset128);
+ res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
+ zero, shift_vec);
+ res2 = vaddq_s16(res2, round_offset128);
+ res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
+ zero, shift_vec);
+ res3 = vaddq_s16(res3, round_offset128);
+ res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, y_filter_tmp,
+ zero, shift_vec);
+ res4 = vaddq_s16(res4, round_offset128);
+ res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
+ y_filter_tmp, zero, shift_vec);
+ res5 = vaddq_s16(res5, round_offset128);
+ res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
+ y_filter_tmp, zero, shift_vec);
+ res6 = vaddq_s16(res6, round_offset128);
+ res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
+ y_filter_tmp, zero, shift_vec);
+ res7 = vaddq_s16(res7, round_offset128);
+
+ if (conv_params->do_average) {
+ __builtin_prefetch(d_tmp + 0 * dst8_stride);
+ __builtin_prefetch(d_tmp + 1 * dst8_stride);
+ __builtin_prefetch(d_tmp + 2 * dst8_stride);
+ __builtin_prefetch(d_tmp + 3 * dst8_stride);
+
+ load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
+ d_tmp += (dst_stride << 2);
+
+ compute_avg_8x4(
+ res8, res9, res10, res11, vreinterpretq_u16_s16(res0),
+ vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
+ vreinterpretq_u16_s16(res3), fwd_offset, bck_offset,
+ round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
+
+ store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
+ d_u8 += (dst8_stride << 2);
+
+ load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
+ d_tmp += (dst_stride << 2);
+
+ compute_avg_8x4(
+ res8, res9, res10, res11, vreinterpretq_u16_s16(res4),
+ vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
+ vreinterpretq_u16_s16(res7), fwd_offset, bck_offset,
+ round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
+
+ store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
+ d_u8 += (dst8_stride << 2);
+ } else {
+ store_u16_8x8(
+ d_tmp, dst_stride, vreinterpretq_u16_s16(res0),
+ vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
+ vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4),
+ vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
+ vreinterpretq_u16_s16(res7));
+ d_tmp += (dst_stride << 3);
+ }
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += (8 * src_stride);
+ height -= 8;
+ } while (height > 0);
+ src_ptr += 8;
+ dst_ptr += 8;
+ dst_u8_ptr += 8;
+ width -= 8;
+ } while (width > 0);
+ }
+}
diff --git a/third_party/aom/av1/common/arm/mem_neon.h b/third_party/aom/av1/common/arm/mem_neon.h
new file mode 100644
index 000000000..214b14bf7
--- /dev/null
+++ b/third_party/aom/av1/common/arm/mem_neon.h
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef AV1_COMMON_ARM_MEM_NEON_H_
+#define AV1_COMMON_ARM_MEM_NEON_H_
+
+#include <arm_neon.h>
+#include <string.h>
+
+static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0,
+ const uint8x8_t s1) {
+ vst1_u8(s, s0);
+ s += p;
+ vst1_u8(s, s1);
+ s += p;
+}
+
+static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p,
+ uint8x8_t *const s0, uint8x8_t *const s1,
+ uint8x8_t *const s2, uint8x8_t *const s3,
+ uint8x8_t *const s4, uint8x8_t *const s5,
+ uint8x8_t *const s6, uint8x8_t *const s7) {
+ *s0 = vld1_u8(s);
+ s += p;
+ *s1 = vld1_u8(s);
+ s += p;
+ *s2 = vld1_u8(s);
+ s += p;
+ *s3 = vld1_u8(s);
+ s += p;
+ *s4 = vld1_u8(s);
+ s += p;
+ *s5 = vld1_u8(s);
+ s += p;
+ *s6 = vld1_u8(s);
+ s += p;
+ *s7 = vld1_u8(s);
+}
+
+static INLINE void load_u8_8x16(const uint8_t *s, ptrdiff_t p,
+ uint8x16_t *const s0, uint8x16_t *const s1,
+ uint8x16_t *const s2, uint8x16_t *const s3) {
+ *s0 = vld1q_u8(s);
+ s += p;
+ *s1 = vld1q_u8(s);
+ s += p;
+ *s2 = vld1q_u8(s);
+ s += p;
+ *s3 = vld1q_u8(s);
+}
+
+static INLINE void load_u8_8x4(const uint8_t *s, const ptrdiff_t p,
+ uint8x8_t *const s0, uint8x8_t *const s1,
+ uint8x8_t *const s2, uint8x8_t *const s3) {
+ *s0 = vld1_u8(s);
+ s += p;
+ *s1 = vld1_u8(s);
+ s += p;
+ *s2 = vld1_u8(s);
+ s += p;
+ *s3 = vld1_u8(s);
+}
+
+static INLINE void load_u16_4x4(const uint16_t *s, const ptrdiff_t p,
+ uint16x4_t *const s0, uint16x4_t *const s1,
+ uint16x4_t *const s2, uint16x4_t *const s3) {
+ *s0 = vld1_u16(s);
+ s += p;
+ *s1 = vld1_u16(s);
+ s += p;
+ *s2 = vld1_u16(s);
+ s += p;
+ *s3 = vld1_u16(s);
+ s += p;
+}
+
+static INLINE void load_u16_8x4(const uint16_t *s, const ptrdiff_t p,
+ uint16x8_t *const s0, uint16x8_t *const s1,
+ uint16x8_t *const s2, uint16x8_t *const s3) {
+ *s0 = vld1q_u16(s);
+ s += p;
+ *s1 = vld1q_u16(s);
+ s += p;
+ *s2 = vld1q_u16(s);
+ s += p;
+ *s3 = vld1q_u16(s);
+ s += p;
+}
+
+static INLINE void load_s16_4x8(const int16_t *s, ptrdiff_t p,
+ int16x4_t *const s0, int16x4_t *const s1,
+ int16x4_t *const s2, int16x4_t *const s3,
+ int16x4_t *const s4, int16x4_t *const s5,
+ int16x4_t *const s6, int16x4_t *const s7) {
+ *s0 = vld1_s16(s);
+ s += p;
+ *s1 = vld1_s16(s);
+ s += p;
+ *s2 = vld1_s16(s);
+ s += p;
+ *s3 = vld1_s16(s);
+ s += p;
+ *s4 = vld1_s16(s);
+ s += p;
+ *s5 = vld1_s16(s);
+ s += p;
+ *s6 = vld1_s16(s);
+ s += p;
+ *s7 = vld1_s16(s);
+}
+
+static INLINE void load_s16_4x4(const int16_t *s, ptrdiff_t p,
+ int16x4_t *const s0, int16x4_t *const s1,
+ int16x4_t *const s2, int16x4_t *const s3) {
+ *s0 = vld1_s16(s);
+ s += p;
+ *s1 = vld1_s16(s);
+ s += p;
+ *s2 = vld1_s16(s);
+ s += p;
+ *s3 = vld1_s16(s);
+}
+
+static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
+ const uint8x8_t s1, const uint8x8_t s2,
+ const uint8x8_t s3, const uint8x8_t s4,
+ const uint8x8_t s5, const uint8x8_t s6,
+ const uint8x8_t s7) {
+ vst1_u8(s, s0);
+ s += p;
+ vst1_u8(s, s1);
+ s += p;
+ vst1_u8(s, s2);
+ s += p;
+ vst1_u8(s, s3);
+ s += p;
+ vst1_u8(s, s4);
+ s += p;
+ vst1_u8(s, s5);
+ s += p;
+ vst1_u8(s, s6);
+ s += p;
+ vst1_u8(s, s7);
+}
+
+static INLINE void store_u8_8x4(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
+ const uint8x8_t s1, const uint8x8_t s2,
+ const uint8x8_t s3) {
+ vst1_u8(s, s0);
+ s += p;
+ vst1_u8(s, s1);
+ s += p;
+ vst1_u8(s, s2);
+ s += p;
+ vst1_u8(s, s3);
+}
+
+static INLINE void store_u8_8x16(uint8_t *s, ptrdiff_t p, const uint8x16_t s0,
+ const uint8x16_t s1, const uint8x16_t s2,
+ const uint8x16_t s3) {
+ vst1q_u8(s, s0);
+ s += p;
+ vst1q_u8(s, s1);
+ s += p;
+ vst1q_u8(s, s2);
+ s += p;
+ vst1q_u8(s, s3);
+}
+
+static INLINE void store_u16_8x8(uint16_t *s, ptrdiff_t dst_stride,
+ const uint16x8_t s0, const uint16x8_t s1,
+ const uint16x8_t s2, const uint16x8_t s3,
+ const uint16x8_t s4, const uint16x8_t s5,
+ const uint16x8_t s6, const uint16x8_t s7) {
+ vst1q_u16(s, s0);
+ s += dst_stride;
+ vst1q_u16(s, s1);
+ s += dst_stride;
+ vst1q_u16(s, s2);
+ s += dst_stride;
+ vst1q_u16(s, s3);
+ s += dst_stride;
+ vst1q_u16(s, s4);
+ s += dst_stride;
+ vst1q_u16(s, s5);
+ s += dst_stride;
+ vst1q_u16(s, s6);
+ s += dst_stride;
+ vst1q_u16(s, s7);
+}
+
+static INLINE void store_u16_4x4(uint16_t *s, ptrdiff_t dst_stride,
+ const uint16x4_t s0, const uint16x4_t s1,
+ const uint16x4_t s2, const uint16x4_t s3) {
+ vst1_u16(s, s0);
+ s += dst_stride;
+ vst1_u16(s, s1);
+ s += dst_stride;
+ vst1_u16(s, s2);
+ s += dst_stride;
+ vst1_u16(s, s3);
+}
+
+static INLINE void store_u16_8x4(uint16_t *s, ptrdiff_t dst_stride,
+ const uint16x8_t s0, const uint16x8_t s1,
+ const uint16x8_t s2, const uint16x8_t s3) {
+ vst1q_u16(s, s0);
+ s += dst_stride;
+ vst1q_u16(s, s1);
+ s += dst_stride;
+ vst1q_u16(s, s2);
+ s += dst_stride;
+ vst1q_u16(s, s3);
+}
+
+static INLINE void store_s16_8x8(int16_t *s, ptrdiff_t dst_stride,
+ const int16x8_t s0, const int16x8_t s1,
+ const int16x8_t s2, const int16x8_t s3,
+ const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7) {
+ vst1q_s16(s, s0);
+ s += dst_stride;
+ vst1q_s16(s, s1);
+ s += dst_stride;
+ vst1q_s16(s, s2);
+ s += dst_stride;
+ vst1q_s16(s, s3);
+ s += dst_stride;
+ vst1q_s16(s, s4);
+ s += dst_stride;
+ vst1q_s16(s, s5);
+ s += dst_stride;
+ vst1q_s16(s, s6);
+ s += dst_stride;
+ vst1q_s16(s, s7);
+}
+
+static INLINE void load_s16_8x8(const int16_t *s, ptrdiff_t p,
+ int16x8_t *const s0, int16x8_t *const s1,
+ int16x8_t *const s2, int16x8_t *const s3,
+ int16x8_t *const s4, int16x8_t *const s5,
+ int16x8_t *const s6, int16x8_t *const s7) {
+ *s0 = vld1q_s16(s);
+ s += p;
+ *s1 = vld1q_s16(s);
+ s += p;
+ *s2 = vld1q_s16(s);
+ s += p;
+ *s3 = vld1q_s16(s);
+ s += p;
+ *s4 = vld1q_s16(s);
+ s += p;
+ *s5 = vld1q_s16(s);
+ s += p;
+ *s6 = vld1q_s16(s);
+ s += p;
+ *s7 = vld1q_s16(s);
+}
+
+static INLINE void load_s16_8x4(const int16_t *s, ptrdiff_t p,
+ int16x8_t *const s0, int16x8_t *const s1,
+ int16x8_t *const s2, int16x8_t *const s3) {
+ *s0 = vld1q_s16(s);
+ s += p;
+ *s1 = vld1q_s16(s);
+ s += p;
+ *s2 = vld1q_s16(s);
+ s += p;
+ *s3 = vld1q_s16(s);
+}
+
+static INLINE void load_unaligned_u8_4x8(const uint8_t *buf, int stride,
+ uint32x2_t *tu0, uint32x2_t *tu1,
+ uint32x2_t *tu2, uint32x2_t *tu3) {
+ uint32_t a;
+
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 0);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 1);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu1 = vset_lane_u32(a, *tu1, 0);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu1 = vset_lane_u32(a, *tu1, 1);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu2 = vset_lane_u32(a, *tu2, 0);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu2 = vset_lane_u32(a, *tu2, 1);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu3 = vset_lane_u32(a, *tu3, 0);
+ memcpy(&a, buf, 4);
+ *tu3 = vset_lane_u32(a, *tu3, 1);
+}
+
+static INLINE void load_unaligned_u8_4x4(const uint8_t *buf, int stride,
+ uint32x2_t *tu0, uint32x2_t *tu1) {
+ uint32_t a;
+
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 0);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 1);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu1 = vset_lane_u32(a, *tu1, 0);
+ memcpy(&a, buf, 4);
+ *tu1 = vset_lane_u32(a, *tu1, 1);
+}
+
+static INLINE void load_unaligned_u8_4x2(const uint8_t *buf, int stride,
+ uint32x2_t *tu0) {
+ uint32_t a;
+
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 0);
+ memcpy(&a, buf, 4);
+ buf += stride;
+ *tu0 = vset_lane_u32(a, *tu0, 1);
+}
+
+static INLINE void load_unaligned_u8_2x2(const uint8_t *buf, int stride,
+ uint16x4_t *tu0) {
+ uint16_t a;
+
+ memcpy(&a, buf, 2);
+ buf += stride;
+ *tu0 = vset_lane_u16(a, *tu0, 0);
+ memcpy(&a, buf, 2);
+ buf += stride;
+ *tu0 = vset_lane_u16(a, *tu0, 1);
+}
+
+static INLINE void load_u8_16x8(const uint8_t *s, ptrdiff_t p,
+ uint8x16_t *const s0, uint8x16_t *const s1,
+ uint8x16_t *const s2, uint8x16_t *const s3,
+ uint8x16_t *const s4, uint8x16_t *const s5,
+ uint8x16_t *const s6, uint8x16_t *const s7) {
+ *s0 = vld1q_u8(s);
+ s += p;
+ *s1 = vld1q_u8(s);
+ s += p;
+ *s2 = vld1q_u8(s);
+ s += p;
+ *s3 = vld1q_u8(s);
+ s += p;
+ *s4 = vld1q_u8(s);
+ s += p;
+ *s5 = vld1q_u8(s);
+ s += p;
+ *s6 = vld1q_u8(s);
+ s += p;
+ *s7 = vld1q_u8(s);
+}
+
+static INLINE void load_u8_16x4(const uint8_t *s, ptrdiff_t p,
+ uint8x16_t *const s0, uint8x16_t *const s1,
+ uint8x16_t *const s2, uint8x16_t *const s3) {
+ *s0 = vld1q_u8(s);
+ s += p;
+ *s1 = vld1q_u8(s);
+ s += p;
+ *s2 = vld1q_u8(s);
+ s += p;
+ *s3 = vld1q_u8(s);
+}
+
+static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride,
+ uint64x2_t *tu0, uint64x2_t *tu1) {
+ uint64_t a;
+
+ memcpy(&a, buf, 8);
+ buf += stride;
+ *tu0 = vsetq_lane_u64(a, *tu0, 0);
+ memcpy(&a, buf, 8);
+ buf += stride;
+ *tu0 = vsetq_lane_u64(a, *tu0, 1);
+ memcpy(&a, buf, 8);
+ buf += stride;
+ *tu1 = vsetq_lane_u64(a, *tu1, 0);
+ memcpy(&a, buf, 8);
+ *tu1 = vsetq_lane_u64(a, *tu1, 1);
+}
+
+#endif // AV1_COMMON_ARM_MEM_NEON_H_
diff --git a/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c b/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c
deleted file mode 100644
index b29228e43..000000000
--- a/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/common.h"
-
-static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
- int32x4_t q8s32, q9s32;
- int16x4x2_t d0x2s16, d1x2s16;
- int32x4x2_t q0x2s32;
-
- d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
- d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
-
- q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
- q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
- q0x2s32 = vtrnq_s32(q8s32, q9s32);
-
- *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
- *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
- return;
-}
-
-static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16) {
- *d0s16 = vdup_n_s16((int16_t)cospi_8_64);
- *d1s16 = vdup_n_s16((int16_t)cospi_16_64);
- *d2s16 = vdup_n_s16((int16_t)cospi_24_64);
- return;
-}
-
-static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16) {
- *d3s16 = vdup_n_s16((int16_t)sinpi_1_9);
- *d4s16 = vdup_n_s16((int16_t)sinpi_2_9);
- *q3s16 = vdupq_n_s16((int16_t)sinpi_3_9);
- *d5s16 = vdup_n_s16((int16_t)sinpi_4_9);
- return;
-}
-
-static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16, int16x8_t *q8s16,
- int16x8_t *q9s16) {
- int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
- int16x4_t d26s16, d27s16, d28s16, d29s16;
- int32x4_t q10s32, q13s32, q14s32, q15s32;
- int16x8_t q13s16, q14s16;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- d23s16 = vadd_s16(d16s16, d18s16);
- d24s16 = vsub_s16(d16s16, d18s16);
-
- q15s32 = vmull_s16(d17s16, *d2s16);
- q10s32 = vmull_s16(d17s16, *d0s16);
- q13s32 = vmull_s16(d23s16, *d1s16);
- q14s32 = vmull_s16(d24s16, *d1s16);
- q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
- q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
-
- d26s16 = vqrshrn_n_s32(q13s32, 14);
- d27s16 = vqrshrn_n_s32(q14s32, 14);
- d29s16 = vqrshrn_n_s32(q15s32, 14);
- d28s16 = vqrshrn_n_s32(q10s32, 14);
-
- q13s16 = vcombine_s16(d26s16, d27s16);
- q14s16 = vcombine_s16(d28s16, d29s16);
- *q8s16 = vaddq_s16(q13s16, q14s16);
- *q9s16 = vsubq_s16(q13s16, q14s16);
- *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
- return;
-}
-
-static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16,
- int16x8_t *q8s16, int16x8_t *q9s16) {
- int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
- int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d6s16 = vget_low_s16(*q3s16);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- q10s32 = vmull_s16(*d3s16, d16s16);
- q11s32 = vmull_s16(*d4s16, d16s16);
- q12s32 = vmull_s16(d6s16, d17s16);
- q13s32 = vmull_s16(*d5s16, d18s16);
- q14s32 = vmull_s16(*d3s16, d18s16);
- q15s32 = vmovl_s16(d16s16);
- q15s32 = vaddw_s16(q15s32, d19s16);
- q8s32 = vmull_s16(*d4s16, d19s16);
- q15s32 = vsubw_s16(q15s32, d18s16);
- q9s32 = vmull_s16(*d5s16, d19s16);
-
- q10s32 = vaddq_s32(q10s32, q13s32);
- q10s32 = vaddq_s32(q10s32, q8s32);
- q11s32 = vsubq_s32(q11s32, q14s32);
- q8s32 = vdupq_n_s32((int32_t)sinpi_3_9);
- q11s32 = vsubq_s32(q11s32, q9s32);
- q15s32 = vmulq_s32(q15s32, q8s32);
-
- q13s32 = vaddq_s32(q10s32, q12s32);
- q10s32 = vaddq_s32(q10s32, q11s32);
- q14s32 = vaddq_s32(q11s32, q12s32);
- q10s32 = vsubq_s32(q10s32, q12s32);
-
- d16s16 = vqrshrn_n_s32(q13s32, 14);
- d17s16 = vqrshrn_n_s32(q14s32, 14);
- d18s16 = vqrshrn_n_s32(q15s32, 14);
- d19s16 = vqrshrn_n_s32(q10s32, 14);
-
- *q8s16 = vcombine_s16(d16s16, d17s16);
- *q9s16 = vcombine_s16(d18s16, d19s16);
- return;
-}
-
-void av1_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
- int dest_stride, const TxfmParam *txfm_param) {
- uint8x8_t d26u8, d27u8;
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
- uint32x2_t d26u32, d27u32;
- int16x8_t q3s16, q8s16, q9s16;
- uint16x8_t q8u16, q9u16;
-
- d26u32 = d27u32 = vdup_n_u32(0);
-
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
-
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT: // idct_idct is not supported. Fall back to C
- av1_iht4x4_16_add_c(input, dest, dest_stride, txfm_param);
- return;
- break;
- case ADST_DCT: // iadst_idct
- // generate constants
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
- break;
- case DCT_ADST: // idct_iadst
- // generate constantsyy
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
- break;
- case ADST_ADST: // iadst_iadst
- // generate constants
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
- break;
- default: // iadst_idct
- assert(0);
- break;
- }
-
- q8s16 = vrshrq_n_s16(q8s16, 4);
- q9s16 = vrshrq_n_s16(q9s16, 4);
-
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
- dest += dest_stride;
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
- dest += dest_stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
- dest += dest_stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
-
- d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
- dest -= dest_stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
- dest -= dest_stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
- dest -= dest_stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
- return;
-}
diff --git a/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c b/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c
deleted file mode 100644
index 4cd43a99d..000000000
--- a/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/common.h"
-
-static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
- int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24
- *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26
- *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28
- *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30
- *q12s16 = vcombine_s16(d17s16, d25s16);
- *q13s16 = vcombine_s16(d19s16, d27s16);
- *q14s16 = vcombine_s16(d21s16, d29s16);
- *q15s16 = vcombine_s16(d23s16, d31s16);
-
- q0x2s32 =
- vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16));
- q1x2s32 =
- vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16));
- q2x2s32 =
- vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16));
- q3x2s32 =
- vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16));
-
- q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8
- vreinterpretq_s16_s32(q1x2s32.val[0])); // q9
- q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10
- vreinterpretq_s16_s32(q1x2s32.val[1])); // q11
- q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12
- vreinterpretq_s16_s32(q3x2s32.val[0])); // q13
- q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14
- vreinterpretq_s16_s32(q3x2s32.val[1])); // q15
-
- *q8s16 = q0x2s16.val[0];
- *q9s16 = q0x2s16.val[1];
- *q10s16 = q1x2s16.val[0];
- *q11s16 = q1x2s16.val[1];
- *q12s16 = q2x2s16.val[0];
- *q13s16 = q2x2s16.val[1];
- *q14s16 = q3x2s16.val[0];
- *q15s16 = q3x2s16.val[1];
- return;
-}
-
-static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
- int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
- int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
-
- d0s16 = vdup_n_s16((int16_t)cospi_28_64);
- d1s16 = vdup_n_s16((int16_t)cospi_4_64);
- d2s16 = vdup_n_s16((int16_t)cospi_12_64);
- d3s16 = vdup_n_s16((int16_t)cospi_20_64);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- q2s32 = vmull_s16(d18s16, d0s16);
- q3s32 = vmull_s16(d19s16, d0s16);
- q5s32 = vmull_s16(d26s16, d2s16);
- q6s32 = vmull_s16(d27s16, d2s16);
-
- q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
- q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
- q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
-
- d8s16 = vqrshrn_n_s32(q2s32, 14);
- d9s16 = vqrshrn_n_s32(q3s32, 14);
- d10s16 = vqrshrn_n_s32(q5s32, 14);
- d11s16 = vqrshrn_n_s32(q6s32, 14);
- q4s16 = vcombine_s16(d8s16, d9s16);
- q5s16 = vcombine_s16(d10s16, d11s16);
-
- q2s32 = vmull_s16(d18s16, d1s16);
- q3s32 = vmull_s16(d19s16, d1s16);
- q9s32 = vmull_s16(d26s16, d3s16);
- q13s32 = vmull_s16(d27s16, d3s16);
-
- q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
- q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
- q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
-
- d14s16 = vqrshrn_n_s32(q2s32, 14);
- d15s16 = vqrshrn_n_s32(q3s32, 14);
- d12s16 = vqrshrn_n_s32(q9s32, 14);
- d13s16 = vqrshrn_n_s32(q13s32, 14);
- q6s16 = vcombine_s16(d12s16, d13s16);
- q7s16 = vcombine_s16(d14s16, d15s16);
-
- d0s16 = vdup_n_s16((int16_t)cospi_16_64);
-
- q2s32 = vmull_s16(d16s16, d0s16);
- q3s32 = vmull_s16(d17s16, d0s16);
- q13s32 = vmull_s16(d16s16, d0s16);
- q15s32 = vmull_s16(d17s16, d0s16);
-
- q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
- q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
-
- d0s16 = vdup_n_s16((int16_t)cospi_24_64);
- d1s16 = vdup_n_s16((int16_t)cospi_8_64);
-
- d18s16 = vqrshrn_n_s32(q2s32, 14);
- d19s16 = vqrshrn_n_s32(q3s32, 14);
- d22s16 = vqrshrn_n_s32(q13s32, 14);
- d23s16 = vqrshrn_n_s32(q15s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q2s32 = vmull_s16(d20s16, d0s16);
- q3s32 = vmull_s16(d21s16, d0s16);
- q8s32 = vmull_s16(d20s16, d1s16);
- q12s32 = vmull_s16(d21s16, d1s16);
-
- q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
- q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
- q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
-
- d26s16 = vqrshrn_n_s32(q2s32, 14);
- d27s16 = vqrshrn_n_s32(q3s32, 14);
- d30s16 = vqrshrn_n_s32(q8s32, 14);
- d31s16 = vqrshrn_n_s32(q12s32, 14);
- *q13s16 = vcombine_s16(d26s16, d27s16);
- *q15s16 = vcombine_s16(d30s16, d31s16);
-
- q0s16 = vaddq_s16(*q9s16, *q15s16);
- q1s16 = vaddq_s16(*q11s16, *q13s16);
- q2s16 = vsubq_s16(*q11s16, *q13s16);
- q3s16 = vsubq_s16(*q9s16, *q15s16);
-
- *q13s16 = vsubq_s16(q4s16, q5s16);
- q4s16 = vaddq_s16(q4s16, q5s16);
- *q14s16 = vsubq_s16(q7s16, q6s16);
- q7s16 = vaddq_s16(q7s16, q6s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
-
- d16s16 = vdup_n_s16((int16_t)cospi_16_64);
-
- q9s32 = vmull_s16(d28s16, d16s16);
- q10s32 = vmull_s16(d29s16, d16s16);
- q11s32 = vmull_s16(d28s16, d16s16);
- q12s32 = vmull_s16(d29s16, d16s16);
-
- q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
- q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
- q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
- q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
-
- d10s16 = vqrshrn_n_s32(q9s32, 14);
- d11s16 = vqrshrn_n_s32(q10s32, 14);
- d12s16 = vqrshrn_n_s32(q11s32, 14);
- d13s16 = vqrshrn_n_s32(q12s32, 14);
- q5s16 = vcombine_s16(d10s16, d11s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
-
- *q8s16 = vaddq_s16(q0s16, q7s16);
- *q9s16 = vaddq_s16(q1s16, q6s16);
- *q10s16 = vaddq_s16(q2s16, q5s16);
- *q11s16 = vaddq_s16(q3s16, q4s16);
- *q12s16 = vsubq_s16(q3s16, q4s16);
- *q13s16 = vsubq_s16(q2s16, q5s16);
- *q14s16 = vsubq_s16(q1s16, q6s16);
- *q15s16 = vsubq_s16(q0s16, q7s16);
- return;
-}
-
-static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q2s16, q4s16, q5s16, q6s16;
- int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
- int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- d14s16 = vdup_n_s16((int16_t)cospi_2_64);
- d15s16 = vdup_n_s16((int16_t)cospi_30_64);
-
- q1s32 = vmull_s16(d30s16, d14s16);
- q2s32 = vmull_s16(d31s16, d14s16);
- q3s32 = vmull_s16(d30s16, d15s16);
- q4s32 = vmull_s16(d31s16, d15s16);
-
- d30s16 = vdup_n_s16((int16_t)cospi_18_64);
- d31s16 = vdup_n_s16((int16_t)cospi_14_64);
-
- q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
- q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
- q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
- q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
-
- q5s32 = vmull_s16(d22s16, d30s16);
- q6s32 = vmull_s16(d23s16, d30s16);
- q7s32 = vmull_s16(d22s16, d31s16);
- q8s32 = vmull_s16(d23s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
- q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
-
- q11s32 = vaddq_s32(q1s32, q5s32);
- q12s32 = vaddq_s32(q2s32, q6s32);
- q1s32 = vsubq_s32(q1s32, q5s32);
- q2s32 = vsubq_s32(q2s32, q6s32);
-
- d22s16 = vqrshrn_n_s32(q11s32, 14);
- d23s16 = vqrshrn_n_s32(q12s32, 14);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q12s32 = vaddq_s32(q3s32, q7s32);
- q15s32 = vaddq_s32(q4s32, q8s32);
- q3s32 = vsubq_s32(q3s32, q7s32);
- q4s32 = vsubq_s32(q4s32, q8s32);
-
- d2s16 = vqrshrn_n_s32(q1s32, 14);
- d3s16 = vqrshrn_n_s32(q2s32, 14);
- d24s16 = vqrshrn_n_s32(q12s32, 14);
- d25s16 = vqrshrn_n_s32(q15s32, 14);
- d6s16 = vqrshrn_n_s32(q3s32, 14);
- d7s16 = vqrshrn_n_s32(q4s32, 14);
- *q12s16 = vcombine_s16(d24s16, d25s16);
-
- d0s16 = vdup_n_s16((int16_t)cospi_10_64);
- d1s16 = vdup_n_s16((int16_t)cospi_22_64);
- q4s32 = vmull_s16(d26s16, d0s16);
- q5s32 = vmull_s16(d27s16, d0s16);
- q2s32 = vmull_s16(d26s16, d1s16);
- q6s32 = vmull_s16(d27s16, d1s16);
-
- d30s16 = vdup_n_s16((int16_t)cospi_26_64);
- d31s16 = vdup_n_s16((int16_t)cospi_6_64);
-
- q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
- q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
- q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
- q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
-
- q0s32 = vmull_s16(d18s16, d30s16);
- q13s32 = vmull_s16(d19s16, d30s16);
-
- q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
- q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
-
- q10s32 = vmull_s16(d18s16, d31s16);
- q9s32 = vmull_s16(d19s16, d31s16);
-
- q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
- q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
-
- q14s32 = vaddq_s32(q2s32, q10s32);
- q15s32 = vaddq_s32(q6s32, q9s32);
- q2s32 = vsubq_s32(q2s32, q10s32);
- q6s32 = vsubq_s32(q6s32, q9s32);
-
- d28s16 = vqrshrn_n_s32(q14s32, 14);
- d29s16 = vqrshrn_n_s32(q15s32, 14);
- d4s16 = vqrshrn_n_s32(q2s32, 14);
- d5s16 = vqrshrn_n_s32(q6s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- q9s32 = vaddq_s32(q4s32, q0s32);
- q10s32 = vaddq_s32(q5s32, q13s32);
- q4s32 = vsubq_s32(q4s32, q0s32);
- q5s32 = vsubq_s32(q5s32, q13s32);
-
- d30s16 = vdup_n_s16((int16_t)cospi_8_64);
- d31s16 = vdup_n_s16((int16_t)cospi_24_64);
-
- d18s16 = vqrshrn_n_s32(q9s32, 14);
- d19s16 = vqrshrn_n_s32(q10s32, 14);
- d8s16 = vqrshrn_n_s32(q4s32, 14);
- d9s16 = vqrshrn_n_s32(q5s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q5s32 = vmull_s16(d2s16, d30s16);
- q6s32 = vmull_s16(d3s16, d30s16);
- q7s32 = vmull_s16(d2s16, d31s16);
- q0s32 = vmull_s16(d3s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
-
- q1s32 = vmull_s16(d4s16, d30s16);
- q3s32 = vmull_s16(d5s16, d30s16);
- q10s32 = vmull_s16(d4s16, d31s16);
- q2s32 = vmull_s16(d5s16, d31s16);
-
- q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
- q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
- q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
- q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
-
- *q8s16 = vaddq_s16(*q11s16, *q9s16);
- *q11s16 = vsubq_s16(*q11s16, *q9s16);
- q4s16 = vaddq_s16(*q12s16, *q14s16);
- *q12s16 = vsubq_s16(*q12s16, *q14s16);
-
- q14s32 = vaddq_s32(q5s32, q1s32);
- q15s32 = vaddq_s32(q6s32, q3s32);
- q5s32 = vsubq_s32(q5s32, q1s32);
- q6s32 = vsubq_s32(q6s32, q3s32);
-
- d18s16 = vqrshrn_n_s32(q14s32, 14);
- d19s16 = vqrshrn_n_s32(q15s32, 14);
- d10s16 = vqrshrn_n_s32(q5s32, 14);
- d11s16 = vqrshrn_n_s32(q6s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q1s32 = vaddq_s32(q7s32, q10s32);
- q3s32 = vaddq_s32(q0s32, q2s32);
- q7s32 = vsubq_s32(q7s32, q10s32);
- q0s32 = vsubq_s32(q0s32, q2s32);
-
- d28s16 = vqrshrn_n_s32(q1s32, 14);
- d29s16 = vqrshrn_n_s32(q3s32, 14);
- d14s16 = vqrshrn_n_s32(q7s32, 14);
- d15s16 = vqrshrn_n_s32(q0s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- d30s16 = vdup_n_s16((int16_t)cospi_16_64);
-
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- q2s32 = vmull_s16(d22s16, d30s16);
- q3s32 = vmull_s16(d23s16, d30s16);
- q13s32 = vmull_s16(d22s16, d30s16);
- q1s32 = vmull_s16(d23s16, d30s16);
-
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
- q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
-
- d4s16 = vqrshrn_n_s32(q2s32, 14);
- d5s16 = vqrshrn_n_s32(q3s32, 14);
- d24s16 = vqrshrn_n_s32(q13s32, 14);
- d25s16 = vqrshrn_n_s32(q1s32, 14);
- q2s16 = vcombine_s16(d4s16, d5s16);
- *q12s16 = vcombine_s16(d24s16, d25s16);
-
- q13s32 = vmull_s16(d10s16, d30s16);
- q1s32 = vmull_s16(d11s16, d30s16);
- q11s32 = vmull_s16(d10s16, d30s16);
- q0s32 = vmull_s16(d11s16, d30s16);
-
- q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
- q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
- q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
-
- d20s16 = vqrshrn_n_s32(q13s32, 14);
- d21s16 = vqrshrn_n_s32(q1s32, 14);
- d12s16 = vqrshrn_n_s32(q11s32, 14);
- d13s16 = vqrshrn_n_s32(q0s32, 14);
- *q10s16 = vcombine_s16(d20s16, d21s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
-
- q5s16 = vdupq_n_s16(0);
-
- *q9s16 = vsubq_s16(q5s16, *q9s16);
- *q11s16 = vsubq_s16(q5s16, q2s16);
- *q13s16 = vsubq_s16(q5s16, q6s16);
- *q15s16 = vsubq_s16(q5s16, q4s16);
- return;
-}
-
-void av1_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
- int dest_stride, const TxfmParam *txfm_param) {
- int i;
- uint8_t *d1, *d2;
- uint8x8_t d0u8, d1u8, d2u8, d3u8;
- uint64x1_t d0u64, d1u64, d2u64, d3u64;
- int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
- uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
- q10s16 = vld1q_s16(input + 8 * 2);
- q11s16 = vld1q_s16(input + 8 * 3);
- q12s16 = vld1q_s16(input + 8 * 4);
- q13s16 = vld1q_s16(input + 8 * 5);
- q14s16 = vld1q_s16(input + 8 * 6);
- q15s16 = vld1q_s16(input + 8 * 7);
-
- TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT: // idct_idct is not supported. Fall back to C
- av1_iht8x8_64_add_c(input, dest, dest_stride, txfm_param);
- return;
- break;
- case ADST_DCT: // iadst_idct
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // first transform rows
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
- break;
- case DCT_ADST: // idct_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // then transform columns
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
- break;
- case ADST_ADST: // iadst_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
- break;
- default: // iadst_idct
- assert(0);
- break;
- }
-
- q8s16 = vrshrq_n_s16(q8s16, 5);
- q9s16 = vrshrq_n_s16(q9s16, 5);
- q10s16 = vrshrq_n_s16(q10s16, 5);
- q11s16 = vrshrq_n_s16(q11s16, 5);
- q12s16 = vrshrq_n_s16(q12s16, 5);
- q13s16 = vrshrq_n_s16(q13s16, 5);
- q14s16 = vrshrq_n_s16(q14s16, 5);
- q15s16 = vrshrq_n_s16(q15s16, 5);
-
- for (d1 = d2 = dest, i = 0; i < 2; i++) {
- if (i != 0) {
- q8s16 = q12s16;
- q9s16 = q13s16;
- q10s16 = q14s16;
- q11s16 = q15s16;
- }
-
- d0u64 = vld1_u64((uint64_t *)d1);
- d1 += dest_stride;
- d1u64 = vld1_u64((uint64_t *)d1);
- d1 += dest_stride;
- d2u64 = vld1_u64((uint64_t *)d1);
- d1 += dest_stride;
- d3u64 = vld1_u64((uint64_t *)d1);
- d1 += dest_stride;
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
- q10u16 =
- vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
- q11u16 =
- vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
- d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
- d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
- d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
- d2 += dest_stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
- d2 += dest_stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
- d2 += dest_stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
- d2 += dest_stride;
- }
- return;
-}
diff --git a/third_party/aom/av1/common/arm/reconinter_neon.c b/third_party/aom/av1/common/arm/reconinter_neon.c
new file mode 100644
index 000000000..44e064195
--- /dev/null
+++ b/third_party/aom/av1/common/arm/reconinter_neon.c
@@ -0,0 +1,86 @@
+/*
+ *
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "aom_ports/mem.h"
+#include "av1/common/arm/mem_neon.h"
+#include "av1/common/blockd.h"
+#include "config/av1_rtcd.h"
+
+void av1_build_compound_diffwtd_mask_d16_neon(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
+ int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+ ConvolveParams *conv_params, int bd) {
+ assert(h >= 4);
+ assert(w >= 4);
+ assert((mask_type == DIFFWTD_38_INV) || (mask_type == DIFFWTD_38));
+ const int round =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
+ uint16x8_t diff_q, tmp0, tmp1;
+ uint8x8_t diff_d, diff_select;
+ const CONV_BUF_TYPE *src0_1, *src1_1;
+ const int16x8_t dup_round = vdupq_n_s16((int16_t)(-round));
+ const uint8x8_t dup_38 = vdup_n_u8(38);
+ const uint8x8_t dup_64 = vdup_n_u8(AOM_BLEND_A64_MAX_ALPHA);
+ if (mask_type == DIFFWTD_38) {
+ diff_select = vdup_n_u8(255);
+ } else {
+ diff_select = vdup_n_u8(0);
+ }
+ if (w >= 8) {
+ for (int i = 0; i < h; ++i) {
+ src0_1 = src0;
+ src1_1 = src1;
+ for (int j = 0; j < w; j += 8) {
+ __builtin_prefetch(src0_1);
+ __builtin_prefetch(src1_1);
+ diff_q = vabdq_u16(vld1q_u16(src0_1), vld1q_u16(src1_1));
+ diff_q = vrshlq_u16(diff_q, dup_round);
+ diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2);
+ diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64);
+ diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d));
+ vst1_u8(mask, diff_d);
+ src0_1 += 8;
+ src1_1 += 8;
+ mask += 8;
+ }
+ src0 += src0_stride;
+ src1 += src1_stride;
+ }
+ } else if (w == 4) {
+ for (int i = 0; i < h; i += 2) {
+ src0_1 = src0;
+ src1_1 = src1;
+ __builtin_prefetch(src0_1 + 0 * src0_stride);
+ __builtin_prefetch(src0_1 + 1 * src0_stride);
+ __builtin_prefetch(src1_1 + 0 * src1_stride);
+ __builtin_prefetch(src1_1 + 1 * src1_stride);
+ tmp0 = vcombine_u16(vld1_u16(src0_1 + (0 * src0_stride)),
+ vld1_u16(src0_1 + (1 * src0_stride)));
+ tmp1 = vcombine_u16(vld1_u16(src1_1 + (0 * src1_stride)),
+ vld1_u16(src1_1 + (1 * src1_stride)));
+ diff_q = vabdq_u16(tmp0, tmp1);
+ diff_q = vrshlq_u16(diff_q, dup_round);
+ diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2);
+ diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64);
+ diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d));
+ vst1_u8(mask, diff_d);
+ src0 += src0_stride * 2;
+ src1 += src1_stride * 2;
+ mask += w * 2;
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/arm/transpose_neon.h b/third_party/aom/av1/common/arm/transpose_neon.h
new file mode 100644
index 000000000..53727bb43
--- /dev/null
+++ b/third_party/aom/av1/common/arm/transpose_neon.h
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef AV1_COMMON_ARM_TRANSPOSE_NEON_H_
+#define AV1_COMMON_ARM_TRANSPOSE_NEON_H_
+
+#include <arm_neon.h>
+
+static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
+ uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5,
+ uint8x8_t *a6, uint8x8_t *a7) {
+ // Swap 8 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16 40 50 42 52 44 54 46 56
+ // b0.val[1]: 01 11 03 13 05 15 07 17 41 51 43 53 45 55 47 57
+ // b1.val[0]: 20 30 22 32 24 34 26 36 60 70 62 72 64 74 66 76
+ // b1.val[1]: 21 31 23 33 25 35 27 37 61 71 63 73 65 75 67 77
+
+ const uint8x16x2_t b0 =
+ vtrnq_u8(vcombine_u8(*a0, *a4), vcombine_u8(*a1, *a5));
+ const uint8x16x2_t b1 =
+ vtrnq_u8(vcombine_u8(*a2, *a6), vcombine_u8(*a3, *a7));
+
+ // Swap 16 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34 40 50 60 70 44 54 64 74
+ // c0.val[1]: 02 12 22 32 06 16 26 36 42 52 62 72 46 56 66 76
+ // c1.val[0]: 01 11 21 31 05 15 25 35 41 51 61 71 45 55 65 75
+ // c1.val[1]: 03 13 23 33 07 17 27 37 43 53 63 73 47 57 67 77
+
+ const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]),
+ vreinterpretq_u16_u8(b1.val[0]));
+ const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]),
+ vreinterpretq_u16_u8(b1.val[1]));
+
+ // Unzip 32 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ // d0.val[1]: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
+ // d1.val[0]: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ // d1.val[1]: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
+ const uint32x4x2_t d0 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[0]),
+ vreinterpretq_u32_u16(c1.val[0]));
+ const uint32x4x2_t d1 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[1]),
+ vreinterpretq_u32_u16(c1.val[1]));
+
+ *a0 = vreinterpret_u8_u32(vget_low_u32(d0.val[0]));
+ *a1 = vreinterpret_u8_u32(vget_high_u32(d0.val[0]));
+ *a2 = vreinterpret_u8_u32(vget_low_u32(d1.val[0]));
+ *a3 = vreinterpret_u8_u32(vget_high_u32(d1.val[0]));
+ *a4 = vreinterpret_u8_u32(vget_low_u32(d0.val[1]));
+ *a5 = vreinterpret_u8_u32(vget_high_u32(d0.val[1]));
+ *a6 = vreinterpret_u8_u32(vget_low_u32(d1.val[1]));
+ *a7 = vreinterpret_u8_u32(vget_high_u32(d1.val[1]));
+}
+
+static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
+ uint8x8_t *a3) {
+ // Swap 8 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+
+ const uint8x8x2_t b0 = vtrn_u8(*a0, *a1);
+ const uint8x8x2_t b1 = vtrn_u8(*a2, *a3);
+
+ // Swap 16 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+
+ const uint16x4x2_t c0 =
+ vtrn_u16(vreinterpret_u16_u8(b0.val[0]), vreinterpret_u16_u8(b1.val[0]));
+ const uint16x4x2_t c1 =
+ vtrn_u16(vreinterpret_u16_u8(b0.val[1]), vreinterpret_u16_u8(b1.val[1]));
+
+ *a0 = vreinterpret_u8_u16(c0.val[0]);
+ *a1 = vreinterpret_u8_u16(c1.val[0]);
+ *a2 = vreinterpret_u8_u16(c0.val[1]);
+ *a3 = vreinterpret_u8_u16(c1.val[1]);
+}
+
+static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 10 11 12 13
+ // a1: 20 21 22 23 30 31 32 33
+ // to:
+ // b0.val[0]: 00 01 20 21 10 11 30 31
+ // b0.val[1]: 02 03 22 23 12 13 32 33
+
+ const uint16x4x2_t b0 =
+ vtrn_u16(vreinterpret_u16_u8(*a0), vreinterpret_u16_u8(*a1));
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 01 20 21 02 03 22 23
+ // c0.val[1]: 10 11 30 31 12 13 32 33
+
+ const uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]),
+ vreinterpret_u32_u16(b0.val[1]));
+
+ // Swap 8 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 02 12 22 32
+ // d0.val[1]: 01 11 21 31 03 13 23 33
+
+ const uint8x8x2_t d0 =
+ vtrn_u8(vreinterpret_u8_u32(c0.val[0]), vreinterpret_u8_u32(c0.val[1]));
+
+ *a0 = d0.val[0];
+ *a1 = d0.val[1];
+}
+
+static INLINE void transpose_u8_4x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
+ uint8x8_t *a3, const uint8x8_t a4,
+ const uint8x8_t a5, const uint8x8_t a6,
+ const uint8x8_t a7) {
+ // Swap 32 bit elements. Goes from:
+ // a0: 00 01 02 03 XX XX XX XX
+ // a1: 10 11 12 13 XX XX XX XX
+ // a2: 20 21 22 23 XX XX XX XX
+ // a3; 30 31 32 33 XX XX XX XX
+ // a4: 40 41 42 43 XX XX XX XX
+ // a5: 50 51 52 53 XX XX XX XX
+ // a6: 60 61 62 63 XX XX XX XX
+ // a7: 70 71 72 73 XX XX XX XX
+ // to:
+ // b0.val[0]: 00 01 02 03 40 41 42 43
+ // b1.val[0]: 10 11 12 13 50 51 52 53
+ // b2.val[0]: 20 21 22 23 60 61 62 63
+ // b3.val[0]: 30 31 32 33 70 71 72 73
+
+ const uint32x2x2_t b0 =
+ vtrn_u32(vreinterpret_u32_u8(*a0), vreinterpret_u32_u8(a4));
+ const uint32x2x2_t b1 =
+ vtrn_u32(vreinterpret_u32_u8(*a1), vreinterpret_u32_u8(a5));
+ const uint32x2x2_t b2 =
+ vtrn_u32(vreinterpret_u32_u8(*a2), vreinterpret_u32_u8(a6));
+ const uint32x2x2_t b3 =
+ vtrn_u32(vreinterpret_u32_u8(*a3), vreinterpret_u32_u8(a7));
+
+ // Swap 16 bit elements resulting in:
+ // c0.val[0]: 00 01 20 21 40 41 60 61
+ // c0.val[1]: 02 03 22 23 42 43 62 63
+ // c1.val[0]: 10 11 30 31 50 51 70 71
+ // c1.val[1]: 12 13 32 33 52 53 72 73
+
+ const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]),
+ vreinterpret_u16_u32(b2.val[0]));
+ const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]),
+ vreinterpret_u16_u32(b3.val[0]));
+
+ // Swap 8 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 40 50 60 70
+ // d0.val[1]: 01 11 21 31 41 51 61 71
+ // d1.val[0]: 02 12 22 32 42 52 62 72
+ // d1.val[1]: 03 13 23 33 43 53 63 73
+
+ const uint8x8x2_t d0 =
+ vtrn_u8(vreinterpret_u8_u16(c0.val[0]), vreinterpret_u8_u16(c1.val[0]));
+ const uint8x8x2_t d1 =
+ vtrn_u8(vreinterpret_u8_u16(c0.val[1]), vreinterpret_u8_u16(c1.val[1]));
+
+ *a0 = d0.val[0];
+ *a1 = d0.val[1];
+ *a2 = d1.val[0];
+ *a3 = d1.val[1];
+}
+
+static INLINE void transpose_u16_4x8(uint16x4_t *a0, uint16x4_t *a1,
+ uint16x4_t *a2, uint16x4_t *a3,
+ uint16x4_t *a4, uint16x4_t *a5,
+ uint16x4_t *a6, uint16x4_t *a7,
+ uint16x8_t *o0, uint16x8_t *o1,
+ uint16x8_t *o2, uint16x8_t *o3) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03
+ // a1: 10 11 12 13
+ // a2: 20 21 22 23
+ // a3: 30 31 32 33
+ // a4: 40 41 42 43
+ // a5: 50 51 52 53
+ // a6: 60 61 62 63
+ // a7: 70 71 72 73
+ // to:
+ // b0.val[0]: 00 10 02 12
+ // b0.val[1]: 01 11 03 13
+ // b1.val[0]: 20 30 22 32
+ // b1.val[1]: 21 31 23 33
+ // b2.val[0]: 40 50 42 52
+ // b2.val[1]: 41 51 43 53
+ // b3.val[0]: 60 70 62 72
+ // b3.val[1]: 61 71 63 73
+
+ uint16x4x2_t b0 = vtrn_u16(*a0, *a1);
+ uint16x4x2_t b1 = vtrn_u16(*a2, *a3);
+ uint16x4x2_t b2 = vtrn_u16(*a4, *a5);
+ uint16x4x2_t b3 = vtrn_u16(*a6, *a7);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30
+ // c0.val[1]: 02 12 22 32
+ // c1.val[0]: 01 11 21 31
+ // c1.val[1]: 03 13 23 33
+ // c2.val[0]: 40 50 60 70
+ // c2.val[1]: 42 52 62 72
+ // c3.val[0]: 41 51 61 71
+ // c3.val[1]: 43 53 63 73
+
+ uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]),
+ vreinterpret_u32_u16(b1.val[0]));
+ uint32x2x2_t c1 = vtrn_u32(vreinterpret_u32_u16(b0.val[1]),
+ vreinterpret_u32_u16(b1.val[1]));
+ uint32x2x2_t c2 = vtrn_u32(vreinterpret_u32_u16(b2.val[0]),
+ vreinterpret_u32_u16(b3.val[0]));
+ uint32x2x2_t c3 = vtrn_u32(vreinterpret_u32_u16(b2.val[1]),
+ vreinterpret_u32_u16(b3.val[1]));
+
+ // Swap 64 bit elements resulting in:
+ // o0: 00 10 20 30 40 50 60 70
+ // o1: 01 11 21 31 41 51 61 71
+ // o2: 02 12 22 32 42 52 62 72
+ // o3: 03 13 23 33 43 53 63 73
+
+ *o0 = vcombine_u16(vreinterpret_u16_u32(c0.val[0]),
+ vreinterpret_u16_u32(c2.val[0]));
+ *o1 = vcombine_u16(vreinterpret_u16_u32(c1.val[0]),
+ vreinterpret_u16_u32(c3.val[0]));
+ *o2 = vcombine_u16(vreinterpret_u16_u32(c0.val[1]),
+ vreinterpret_u16_u32(c2.val[1]));
+ *o3 = vcombine_u16(vreinterpret_u16_u32(c1.val[1]),
+ vreinterpret_u16_u32(c3.val[1]));
+}
+
+static INLINE void transpose_u16_8x8(uint16x8_t *a0, uint16x8_t *a1,
+ uint16x8_t *a2, uint16x8_t *a3,
+ uint16x8_t *a4, uint16x8_t *a5,
+ uint16x8_t *a6, uint16x8_t *a7) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ // b2.val[0]: 40 50 42 52 44 54 46 56
+ // b2.val[1]: 41 51 43 53 45 55 47 57
+ // b3.val[0]: 60 70 62 72 64 74 66 76
+ // b3.val[1]: 61 71 63 73 65 75 67 77
+
+ const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1);
+ const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3);
+ const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5);
+ const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+ // c2.val[0]: 40 50 60 70 44 54 64 74
+ // c2.val[1]: 42 52 62 72 46 56 66 76
+ // c3.val[0]: 41 51 61 71 45 55 65 75
+ // c3.val[1]: 43 53 63 73 47 57 67 77
+
+ const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]),
+ vreinterpretq_u32_u16(b1.val[0]));
+ const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]),
+ vreinterpretq_u32_u16(b1.val[1]));
+ const uint32x4x2_t c2 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[0]),
+ vreinterpretq_u32_u16(b3.val[0]));
+ const uint32x4x2_t c3 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[1]),
+ vreinterpretq_u32_u16(b3.val[1]));
+
+ *a0 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[0])),
+ vget_low_u16(vreinterpretq_u16_u32(c2.val[0])));
+ *a4 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[0])),
+ vget_high_u16(vreinterpretq_u16_u32(c2.val[0])));
+
+ *a2 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[1])),
+ vget_low_u16(vreinterpretq_u16_u32(c2.val[1])));
+ *a6 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[1])),
+ vget_high_u16(vreinterpretq_u16_u32(c2.val[1])));
+
+ *a1 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[0])),
+ vget_low_u16(vreinterpretq_u16_u32(c3.val[0])));
+ *a5 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[0])),
+ vget_high_u16(vreinterpretq_u16_u32(c3.val[0])));
+
+ *a3 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[1])),
+ vget_low_u16(vreinterpretq_u16_u32(c3.val[1])));
+ *a7 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[1])),
+ vget_high_u16(vreinterpretq_u16_u32(c3.val[1])));
+}
+
+static INLINE void transpose_s16_8x8(int16x8_t *a0, int16x8_t *a1,
+ int16x8_t *a2, int16x8_t *a3,
+ int16x8_t *a4, int16x8_t *a5,
+ int16x8_t *a6, int16x8_t *a7) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ // b2.val[0]: 40 50 42 52 44 54 46 56
+ // b2.val[1]: 41 51 43 53 45 55 47 57
+ // b3.val[0]: 60 70 62 72 64 74 66 76
+ // b3.val[1]: 61 71 63 73 65 75 67 77
+
+ const int16x8x2_t b0 = vtrnq_s16(*a0, *a1);
+ const int16x8x2_t b1 = vtrnq_s16(*a2, *a3);
+ const int16x8x2_t b2 = vtrnq_s16(*a4, *a5);
+ const int16x8x2_t b3 = vtrnq_s16(*a6, *a7);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+ // c2.val[0]: 40 50 60 70 44 54 64 74
+ // c2.val[1]: 42 52 62 72 46 56 66 76
+ // c3.val[0]: 41 51 61 71 45 55 65 75
+ // c3.val[1]: 43 53 63 73 47 57 67 77
+
+ const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]),
+ vreinterpretq_s32_s16(b1.val[0]));
+ const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]),
+ vreinterpretq_s32_s16(b1.val[1]));
+ const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]),
+ vreinterpretq_s32_s16(b3.val[0]));
+ const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]),
+ vreinterpretq_s32_s16(b3.val[1]));
+
+ *a0 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[0])),
+ vget_low_s16(vreinterpretq_s16_s32(c2.val[0])));
+ *a4 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[0])),
+ vget_high_s16(vreinterpretq_s16_s32(c2.val[0])));
+
+ *a2 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[1])),
+ vget_low_s16(vreinterpretq_s16_s32(c2.val[1])));
+ *a6 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[1])),
+ vget_high_s16(vreinterpretq_s16_s32(c2.val[1])));
+
+ *a1 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[0])),
+ vget_low_s16(vreinterpretq_s16_s32(c3.val[0])));
+ *a5 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[0])),
+ vget_high_s16(vreinterpretq_s16_s32(c3.val[0])));
+
+ *a3 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[1])),
+ vget_low_s16(vreinterpretq_s16_s32(c3.val[1])));
+ *a7 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[1])),
+ vget_high_s16(vreinterpretq_s16_s32(c3.val[1])));
+}
+
+static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1,
+ int16x4_t *a2, int16x4_t *a3) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03
+ // a1: 10 11 12 13
+ // a2: 20 21 22 23
+ // a3: 30 31 32 33
+ // to:
+ // b0.val[0]: 00 10 02 12
+ // b0.val[1]: 01 11 03 13
+ // b1.val[0]: 20 30 22 32
+ // b1.val[1]: 21 31 23 33
+
+ const int16x4x2_t b0 = vtrn_s16(*a0, *a1);
+ const int16x4x2_t b1 = vtrn_s16(*a2, *a3);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30
+ // c0.val[1]: 02 12 22 32
+ // c1.val[0]: 01 11 21 31
+ // c1.val[1]: 03 13 23 33
+
+ const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]),
+ vreinterpret_s32_s16(b1.val[0]));
+ const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]),
+ vreinterpret_s32_s16(b1.val[1]));
+
+ *a0 = vreinterpret_s16_s32(c0.val[0]);
+ *a1 = vreinterpret_s16_s32(c1.val[0]);
+ *a2 = vreinterpret_s16_s32(c0.val[1]);
+ *a3 = vreinterpret_s16_s32(c1.val[1]);
+}
+
+#endif // AV1_COMMON_ARM_TRANSPOSE_NEON_H_
diff --git a/third_party/aom/av1/common/arm/wiener_convolve_neon.c b/third_party/aom/av1/common/arm/wiener_convolve_neon.c
new file mode 100644
index 000000000..72fbed4d4
--- /dev/null
+++ b/third_party/aom/av1/common/arm/wiener_convolve_neon.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+#include "av1/common/common.h"
+#include "av1/common/arm/convolve_neon.h"
+#include "av1/common/arm/mem_neon.h"
+#include "av1/common/arm/transpose_neon.h"
+
+/* Wiener filter 2D
+ Apply horizontal filter and store in a temporary buffer. When applying
+ vertical filter, overwrite the original pixel values.
+ */
+
+void av1_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h,
+ const ConvolveParams *conv_params) {
+ uint16_t *d_tmp;
+ uint8_t *d;
+ const uint8_t *src_ptr, *s_tmp;
+ uint16_t *dst_ptr;
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ int width, height;
+ const int bd = 8;
+ const int intermediate_height = h + SUBPEL_TAPS - 1;
+ const int center_tap = ((SUBPEL_TAPS - 1) / 2);
+ int16_t filter_x_tmp[7], filter_y_tmp[7];
+
+ DECLARE_ALIGNED(16, uint16_t,
+ temp[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
+
+ assert(x_step_q4 == 16 && y_step_q4 == 16);
+ assert(!(w % 8));
+
+ assert(w <= MAX_SB_SIZE);
+ assert(h <= MAX_SB_SIZE);
+
+ assert(filter_x[7] == 0);
+ assert(filter_y[7] == 0);
+
+ /* assumption of horizontal filtering output will not exceed 15 bit.
+ ((bd) + 1 + FILTER_BITS - conv_params->round_0) <= 15
+ 16 - conv_params->round_0 <= 15 -- (conv_params->round_0) >= 1
+ */
+ assert((conv_params->round_0) >= 1);
+
+ memcpy(&filter_x_tmp[0], filter_x, sizeof(*filter_x) * FILTER_BITS);
+ memcpy(&filter_y_tmp[0], filter_y, sizeof(*filter_y) * FILTER_BITS);
+
+ filter_x_tmp[3] += (1 << FILTER_BITS);
+ filter_y_tmp[3] += (1 << FILTER_BITS);
+
+ s_tmp = src - center_tap * src_stride - center_tap;
+ dst_ptr = temp;
+ src_ptr = s_tmp;
+ height = intermediate_height;
+
+ /* if height is a multiple of 8 */
+ if (!(h & 7)) {
+ int16x8_t res0, res1, res2, res3;
+ uint16x8_t res4, res5, res6, res7, res8, res9, res10, res11;
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+ uint8x8_t t8, t9, t10, t11, t12, t13, t14;
+
+ do {
+ const uint8_t *s;
+
+ __builtin_prefetch(src_ptr + 0 * src_stride);
+ __builtin_prefetch(src_ptr + 1 * src_stride);
+ __builtin_prefetch(src_ptr + 2 * src_stride);
+ __builtin_prefetch(src_ptr + 3 * src_stride);
+ __builtin_prefetch(src_ptr + 4 * src_stride);
+ __builtin_prefetch(src_ptr + 5 * src_stride);
+ __builtin_prefetch(src_ptr + 6 * src_stride);
+ __builtin_prefetch(src_ptr + 7 * src_stride);
+
+ load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+ transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+ s = src_ptr + 7;
+ d_tmp = dst_ptr;
+ width = w;
+
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+ __builtin_prefetch(dst_ptr + 4 * dst_stride);
+ __builtin_prefetch(dst_ptr + 5 * dst_stride);
+ __builtin_prefetch(dst_ptr + 6 * dst_stride);
+ __builtin_prefetch(dst_ptr + 7 * dst_stride);
+
+ do {
+ load_u8_8x8(s, src_stride, &t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14);
+ transpose_u8_8x8(&t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t0, t6));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+ res4 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t1, t7));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t2, t6));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t3, t5));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t4));
+ res5 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t2, t8));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t3, t7));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t4, t6));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t5));
+ res6 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t3, t9));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t4, t8));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t5, t7));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t6));
+ res7 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t4, t10));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t5, t9));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t6, t8));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t7));
+ res8 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t5, t11));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t6, t10));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t7, t9));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t8));
+ res9 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t6, t12));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t7, t11));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t8, t10));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t9));
+ res10 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ res0 = vreinterpretq_s16_u16(vaddl_u8(t7, t13));
+ res1 = vreinterpretq_s16_u16(vaddl_u8(t8, t12));
+ res2 = vreinterpretq_s16_u16(vaddl_u8(t9, t11));
+ res3 = vreinterpretq_s16_u16(vmovl_u8(t10));
+ res11 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
+ bd, conv_params->round_0);
+
+ transpose_u16_8x8(&res4, &res5, &res6, &res7, &res8, &res9, &res10,
+ &res11);
+ store_u16_8x8(d_tmp, MAX_SB_SIZE, res4, res5, res6, res7, res8, res9,
+ res10, res11);
+
+ t0 = t8;
+ t1 = t9;
+ t2 = t10;
+ t3 = t11;
+ t4 = t12;
+ t5 = t13;
+ t6 = t14;
+ s += 8;
+ d_tmp += 8;
+ width -= 8;
+ } while (width > 0);
+ src_ptr += 8 * src_stride;
+ dst_ptr += 8 * MAX_SB_SIZE;
+ height -= 8;
+ } while (height > 0);
+ } else {
+ /*if height is a multiple of 4*/
+ int16x8_t tt0, tt1, tt2, tt3;
+ const uint8_t *s;
+ uint16x4_t res0, res1, res2, res3, res4, res5, res6, res7;
+ uint16x8_t d0, d1, d2, d3;
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int16x4_t s11, s12, s13, s14;
+ uint8x8_t t0, t1, t2, t3;
+
+ do {
+ __builtin_prefetch(src_ptr + 0 * src_stride);
+ __builtin_prefetch(src_ptr + 1 * src_stride);
+ __builtin_prefetch(src_ptr + 2 * src_stride);
+ __builtin_prefetch(src_ptr + 3 * src_stride);
+
+ load_u8_8x4(src_ptr, src_stride, &t0, &t1, &t2, &t3); /*8x4*/
+ transpose_u8_8x4(&t0, &t1, &t2,
+ &t3); /*first 8 pixels of 4 rows transposed-- 4x8*/
+
+ tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+
+ s0 = vget_low_s16(tt0); /*pa0 pb0 pc0 pd0 -- pixel_a0*/
+ s1 = vget_low_s16(tt1); /*pa1 pb1 pc1 pd1 */
+ s2 = vget_low_s16(tt2); /*pa2 pb2 pc2 pd2 */
+ s3 = vget_low_s16(tt3); /*pa3 pb3 pc3 pd3 */
+ s4 = vget_high_s16(tt0); /*pa4 pb4 pc4 pd4 */
+ s5 = vget_high_s16(tt1); /*pa5 pb5 pc5 pd5 */
+ s6 = vget_high_s16(tt2); /*pa6 pb6 pc6 pd6 */
+
+ __builtin_prefetch(dst_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_ptr + 3 * dst_stride);
+
+ s = src_ptr + 7;
+ d_tmp = dst_ptr;
+ width = w;
+
+ do {
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); /*8x4*/
+ transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+ tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
+ tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
+ tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
+ tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
+
+ s7 = vget_low_s16(tt0); /*pa7 pb7 pc7 pd7 */ /*4x8*/
+ s8 = vget_low_s16(tt1); /*pa8 pb8 pc8 pd8 */
+ s9 = vget_low_s16(tt2); /*pa9 pb9 pc9 pd9 */
+ s10 = vget_low_s16(tt3); /*pa10 pb10 pc10 pd10 */
+ s11 = vget_high_s16(tt0); /*pa11 pb11 pc11 pd11 */
+ s12 = vget_high_s16(tt1); /*pa12 pb12 pc12 pd12 */
+ s13 = vget_high_s16(tt2); /*pa13 pb13 pc13 pd13 */
+ s14 = vget_high_s16(tt3); /*pa14 pb14 pc14 pd14 */
+
+ res0 = wiener_convolve8_horiz_4x8(
+ s0, s1, s2, s3, s4, s5, s6, filter_x_tmp, bd, conv_params->round_0);
+ res1 = wiener_convolve8_horiz_4x8(
+ s1, s2, s3, s4, s5, s6, s7, filter_x_tmp, bd, conv_params->round_0);
+ res2 = wiener_convolve8_horiz_4x8(
+ s2, s3, s4, s5, s6, s7, s8, filter_x_tmp, bd, conv_params->round_0);
+ res3 = wiener_convolve8_horiz_4x8(
+ s3, s4, s5, s6, s7, s8, s9, filter_x_tmp, bd, conv_params->round_0);
+ res4 =
+ wiener_convolve8_horiz_4x8(s4, s5, s6, s7, s8, s9, s10,
+ filter_x_tmp, bd, conv_params->round_0);
+ res5 =
+ wiener_convolve8_horiz_4x8(s5, s6, s7, s8, s9, s10, s11,
+ filter_x_tmp, bd, conv_params->round_0);
+ res6 =
+ wiener_convolve8_horiz_4x8(s6, s7, s8, s9, s10, s11, s12,
+ filter_x_tmp, bd, conv_params->round_0);
+ res7 =
+ wiener_convolve8_horiz_4x8(s7, s8, s9, s10, s11, s12, s13,
+ filter_x_tmp, bd, conv_params->round_0);
+
+ transpose_u16_4x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
+ &res7, &d0, &d1, &d2, &d3);
+
+ store_u16_8x4(d_tmp, MAX_SB_SIZE, d0, d1, d2, d3);
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d_tmp += 8;
+ width -= 8;
+ } while (width > 0);
+
+ src_ptr += 4 * src_stride;
+ dst_ptr += 4 * MAX_SB_SIZE;
+ height -= 4;
+ } while (height > 0);
+ }
+
+ {
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint8x8_t t0, t1, t2, t3;
+ int16_t *src_tmp_ptr, *s;
+ uint8_t *dst_tmp_ptr;
+ height = h;
+ width = w;
+ src_tmp_ptr = (int16_t *)temp;
+ dst_tmp_ptr = dst;
+ src_stride = MAX_SB_SIZE;
+
+ do {
+ s = src_tmp_ptr;
+ s0 = vld1q_s16(s);
+ s += src_stride;
+ s1 = vld1q_s16(s);
+ s += src_stride;
+ s2 = vld1q_s16(s);
+ s += src_stride;
+ s3 = vld1q_s16(s);
+ s += src_stride;
+ s4 = vld1q_s16(s);
+ s += src_stride;
+ s5 = vld1q_s16(s);
+ s += src_stride;
+ s6 = vld1q_s16(s);
+ s += src_stride;
+ d = dst_tmp_ptr;
+ height = h;
+
+ do {
+ __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride);
+ __builtin_prefetch(dst_tmp_ptr + 2 * dst_stride);
+ __builtin_prefetch(dst_tmp_ptr + 3 * dst_stride);
+
+ s7 = vld1q_s16(s);
+ s += src_stride;
+ s8 = vld1q_s16(s);
+ s += src_stride;
+ s9 = vld1q_s16(s);
+ s += src_stride;
+ s10 = vld1q_s16(s);
+ s += src_stride;
+
+ t0 = wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, filter_y_tmp,
+ bd, conv_params->round_1);
+ t1 = wiener_convolve8_vert_4x8(s1, s2, s3, s4, s5, s6, s7, filter_y_tmp,
+ bd, conv_params->round_1);
+ t2 = wiener_convolve8_vert_4x8(s2, s3, s4, s5, s6, s7, s8, filter_y_tmp,
+ bd, conv_params->round_1);
+ t3 = wiener_convolve8_vert_4x8(s3, s4, s5, s6, s7, s8, s9, filter_y_tmp,
+ bd, conv_params->round_1);
+
+ vst1_u8(d, t0);
+ d += dst_stride;
+ vst1_u8(d, t1);
+ d += dst_stride;
+ vst1_u8(d, t2);
+ d += dst_stride;
+ vst1_u8(d, t3);
+ d += dst_stride;
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 3);
+
+ if (height != 0) {
+ __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride);
+ __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride);
+
+ do {
+ s7 = vld1q_s16(s);
+ s += src_stride;
+
+ t0 =
+ wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6,
+ filter_y_tmp, bd, conv_params->round_1);
+ vst1_u8(d, t0);
+ d += dst_stride;
+
+ s0 = s1;
+ s1 = s2;
+ s2 = s3;
+ s3 = s4;
+ s4 = s5;
+ s5 = s6;
+ s6 = s7;
+ height -= 1;
+ } while (height > 0);
+ }
+
+ src_tmp_ptr += 8;
+ dst_tmp_ptr += 8;
+
+ w -= 8;
+ } while (w > 0);
+ }
+}
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d.c b/third_party/aom/av1/common/av1_fwd_txfm1d.c
deleted file mode 100644
index c9c7f437e..000000000
--- a/third_party/aom/av1/common/av1_fwd_txfm1d.c
+++ /dev/null
@@ -1,2355 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include "aom_dsp/inv_txfm.h"
-#include "av1/common/av1_fwd_txfm1d.h"
-#if CONFIG_COEFFICIENT_RANGE_CHECKING
-
-void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
- int32_t size, int8_t bit);
-
-#define range_check(stage, input, buf, size, bit) \
- range_check_func(stage, input, buf, size, bit)
-#else
-#define range_check(stage, input, buf, size, bit) \
- { \
- (void)stage; \
- (void)input; \
- (void)buf; \
- (void)size; \
- (void)bit; \
- }
-#endif
-
-// TODO(angiebird): Make 1-d txfm functions static
-void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 4;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[4];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[3];
- bf1[1] = input[1] + input[2];
- bf1[2] = -input[2] + input[1];
- bf1[3] = -input[3] + input[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[2];
- bf1[2] = bf0[1];
- bf1[3] = bf0[3];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 8;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[7];
- bf1[1] = input[1] + input[6];
- bf1[2] = input[2] + input[5];
- bf1[3] = input[3] + input[4];
- bf1[4] = -input[4] + input[3];
- bf1[5] = -input[5] + input[2];
- bf1[6] = -input[6] + input[1];
- bf1[7] = -input[7] + input[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
- bf1[7] = bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[4];
- bf1[2] = bf0[2];
- bf1[3] = bf0[6];
- bf1[4] = bf0[1];
- bf1[5] = bf0[5];
- bf1[6] = bf0[3];
- bf1[7] = bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 16;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[15];
- bf1[1] = input[1] + input[14];
- bf1[2] = input[2] + input[13];
- bf1[3] = input[3] + input[12];
- bf1[4] = input[4] + input[11];
- bf1[5] = input[5] + input[10];
- bf1[6] = input[6] + input[9];
- bf1[7] = input[7] + input[8];
- bf1[8] = -input[8] + input[7];
- bf1[9] = -input[9] + input[6];
- bf1[10] = -input[10] + input[5];
- bf1[11] = -input[11] + input[4];
- bf1[12] = -input[12] + input[3];
- bf1[13] = -input[13] + input[2];
- bf1[14] = -input[14] + input[1];
- bf1[15] = -input[15] + input[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
- bf1[15] = bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[8];
- bf1[2] = bf0[4];
- bf1[3] = bf0[12];
- bf1[4] = bf0[2];
- bf1[5] = bf0[10];
- bf1[6] = bf0[6];
- bf1[7] = bf0[14];
- bf1[8] = bf0[1];
- bf1[9] = bf0[9];
- bf1[10] = bf0[5];
- bf1[11] = bf0[13];
- bf1[12] = bf0[3];
- bf1[13] = bf0[11];
- bf1[14] = bf0[7];
- bf1[15] = bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 32;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[32];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[31];
- bf1[1] = input[1] + input[30];
- bf1[2] = input[2] + input[29];
- bf1[3] = input[3] + input[28];
- bf1[4] = input[4] + input[27];
- bf1[5] = input[5] + input[26];
- bf1[6] = input[6] + input[25];
- bf1[7] = input[7] + input[24];
- bf1[8] = input[8] + input[23];
- bf1[9] = input[9] + input[22];
- bf1[10] = input[10] + input[21];
- bf1[11] = input[11] + input[20];
- bf1[12] = input[12] + input[19];
- bf1[13] = input[13] + input[18];
- bf1[14] = input[14] + input[17];
- bf1[15] = input[15] + input[16];
- bf1[16] = -input[16] + input[15];
- bf1[17] = -input[17] + input[14];
- bf1[18] = -input[18] + input[13];
- bf1[19] = -input[19] + input[12];
- bf1[20] = -input[20] + input[11];
- bf1[21] = -input[21] + input[10];
- bf1[22] = -input[22] + input[9];
- bf1[23] = -input[23] + input[8];
- bf1[24] = -input[24] + input[7];
- bf1[25] = -input[25] + input[6];
- bf1[26] = -input[26] + input[5];
- bf1[27] = -input[27] + input[4];
- bf1[28] = -input[28] + input[3];
- bf1[29] = -input[29] + input[2];
- bf1[30] = -input[30] + input[1];
- bf1[31] = -input[31] + input[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = -bf0[8] + bf0[7];
- bf1[9] = -bf0[9] + bf0[6];
- bf1[10] = -bf0[10] + bf0[5];
- bf1[11] = -bf0[11] + bf0[4];
- bf1[12] = -bf0[12] + bf0[3];
- bf1[13] = -bf0[13] + bf0[2];
- bf1[14] = -bf0[14] + bf0[1];
- bf1[15] = -bf0[15] + bf0[0];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
- bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = -bf0[20] + bf0[19];
- bf1[21] = -bf0[21] + bf0[18];
- bf1[22] = -bf0[22] + bf0[17];
- bf1[23] = -bf0[23] + bf0[16];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[28] + bf0[27];
- bf1[29] = bf0[29] + bf0[26];
- bf1[30] = bf0[30] + bf0[25];
- bf1[31] = bf0[31] + bf0[24];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
- bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = -bf0[18] + bf0[17];
- bf1[19] = -bf0[19] + bf0[16];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[22] + bf0[21];
- bf1[23] = bf0[23] + bf0[20];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = -bf0[26] + bf0[25];
- bf1[27] = -bf0[27] + bf0[24];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[30] + bf0[29];
- bf1[31] = bf0[31] + bf0[28];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
- bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
- bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = -bf0[17] + bf0[16];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[19] + bf0[18];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = -bf0[21] + bf0[20];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[23] + bf0[22];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = -bf0[25] + bf0[24];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[27] + bf0[26];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = -bf0[29] + bf0[28];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[31] + bf0[30];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
- bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
- bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
- bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
- bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
- bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
- bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[16];
- bf1[2] = bf0[8];
- bf1[3] = bf0[24];
- bf1[4] = bf0[4];
- bf1[5] = bf0[20];
- bf1[6] = bf0[12];
- bf1[7] = bf0[28];
- bf1[8] = bf0[2];
- bf1[9] = bf0[18];
- bf1[10] = bf0[10];
- bf1[11] = bf0[26];
- bf1[12] = bf0[6];
- bf1[13] = bf0[22];
- bf1[14] = bf0[14];
- bf1[15] = bf0[30];
- bf1[16] = bf0[1];
- bf1[17] = bf0[17];
- bf1[18] = bf0[9];
- bf1[19] = bf0[25];
- bf1[20] = bf0[5];
- bf1[21] = bf0[21];
- bf1[22] = bf0[13];
- bf1[23] = bf0[29];
- bf1[24] = bf0[3];
- bf1[25] = bf0[19];
- bf1[26] = bf0[11];
- bf1[27] = bf0[27];
- bf1[28] = bf0[7];
- bf1[29] = bf0[23];
- bf1[30] = bf0[15];
- bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst4_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 4;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[4];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[3];
- bf1[1] = input[0];
- bf1[2] = input[1];
- bf1[3] = input[2];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = -bf0[2] + bf0[0];
- bf1[3] = -bf0[3] + bf0[1];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[2];
- bf1[2] = bf0[3];
- bf1[3] = -bf0[1];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst8_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 8;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[7];
- bf1[1] = input[0];
- bf1[2] = input[5];
- bf1[3] = input[2];
- bf1[4] = input[3];
- bf1[5] = input[4];
- bf1[6] = input[1];
- bf1[7] = input[6];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
- bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = -bf0[4] + bf0[0];
- bf1[5] = -bf0[5] + bf0[1];
- bf1[6] = -bf0[6] + bf0[2];
- bf1[7] = -bf0[7] + bf0[3];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = -bf0[2] + bf0[0];
- bf1[3] = -bf0[3] + bf0[1];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = -bf0[6] + bf0[4];
- bf1[7] = -bf0[7] + bf0[5];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[4];
- bf1[2] = bf0[6];
- bf1[3] = -bf0[2];
- bf1[4] = bf0[3];
- bf1[5] = -bf0[7];
- bf1[6] = bf0[5];
- bf1[7] = -bf0[1];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 16;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[15];
- bf1[1] = input[0];
- bf1[2] = input[13];
- bf1[3] = input[2];
- bf1[4] = input[11];
- bf1[5] = input[4];
- bf1[6] = input[9];
- bf1[7] = input[6];
- bf1[8] = input[7];
- bf1[9] = input[8];
- bf1[10] = input[5];
- bf1[11] = input[10];
- bf1[12] = input[3];
- bf1[13] = input[12];
- bf1[14] = input[1];
- bf1[15] = input[14];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
- bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
- bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
- bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
- bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[8];
- bf1[1] = bf0[1] + bf0[9];
- bf1[2] = bf0[2] + bf0[10];
- bf1[3] = bf0[3] + bf0[11];
- bf1[4] = bf0[4] + bf0[12];
- bf1[5] = bf0[5] + bf0[13];
- bf1[6] = bf0[6] + bf0[14];
- bf1[7] = bf0[7] + bf0[15];
- bf1[8] = -bf0[8] + bf0[0];
- bf1[9] = -bf0[9] + bf0[1];
- bf1[10] = -bf0[10] + bf0[2];
- bf1[11] = -bf0[11] + bf0[3];
- bf1[12] = -bf0[12] + bf0[4];
- bf1[13] = -bf0[13] + bf0[5];
- bf1[14] = -bf0[14] + bf0[6];
- bf1[15] = -bf0[15] + bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = -bf0[4] + bf0[0];
- bf1[5] = -bf0[5] + bf0[1];
- bf1[6] = -bf0[6] + bf0[2];
- bf1[7] = -bf0[7] + bf0[3];
- bf1[8] = bf0[8] + bf0[12];
- bf1[9] = bf0[9] + bf0[13];
- bf1[10] = bf0[10] + bf0[14];
- bf1[11] = bf0[11] + bf0[15];
- bf1[12] = -bf0[12] + bf0[8];
- bf1[13] = -bf0[13] + bf0[9];
- bf1[14] = -bf0[14] + bf0[10];
- bf1[15] = -bf0[15] + bf0[11];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = -bf0[2] + bf0[0];
- bf1[3] = -bf0[3] + bf0[1];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = -bf0[6] + bf0[4];
- bf1[7] = -bf0[7] + bf0[5];
- bf1[8] = bf0[8] + bf0[10];
- bf1[9] = bf0[9] + bf0[11];
- bf1[10] = -bf0[10] + bf0[8];
- bf1[11] = -bf0[11] + bf0[9];
- bf1[12] = bf0[12] + bf0[14];
- bf1[13] = bf0[13] + bf0[15];
- bf1[14] = -bf0[14] + bf0[12];
- bf1[15] = -bf0[15] + bf0[13];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[8];
- bf1[2] = bf0[12];
- bf1[3] = -bf0[4];
- bf1[4] = bf0[6];
- bf1[5] = -bf0[14];
- bf1[6] = bf0[10];
- bf1[7] = -bf0[2];
- bf1[8] = bf0[3];
- bf1[9] = -bf0[11];
- bf1[10] = bf0[15];
- bf1[11] = -bf0[7];
- bf1[12] = bf0[5];
- bf1[13] = -bf0[13];
- bf1[14] = bf0[9];
- bf1[15] = -bf0[1];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 32;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[32];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[31];
- bf1[1] = input[0];
- bf1[2] = input[29];
- bf1[3] = input[2];
- bf1[4] = input[27];
- bf1[5] = input[4];
- bf1[6] = input[25];
- bf1[7] = input[6];
- bf1[8] = input[23];
- bf1[9] = input[8];
- bf1[10] = input[21];
- bf1[11] = input[10];
- bf1[12] = input[19];
- bf1[13] = input[12];
- bf1[14] = input[17];
- bf1[15] = input[14];
- bf1[16] = input[15];
- bf1[17] = input[16];
- bf1[18] = input[13];
- bf1[19] = input[18];
- bf1[20] = input[11];
- bf1[21] = input[20];
- bf1[22] = input[9];
- bf1[23] = input[22];
- bf1[24] = input[7];
- bf1[25] = input[24];
- bf1[26] = input[5];
- bf1[27] = input[26];
- bf1[28] = input[3];
- bf1[29] = input[28];
- bf1[30] = input[1];
- bf1[31] = input[30];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
- bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
- bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
- bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
- bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
- bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
- bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
- bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
- bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
- bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
- bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
- bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
- bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
- bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[16];
- bf1[1] = bf0[1] + bf0[17];
- bf1[2] = bf0[2] + bf0[18];
- bf1[3] = bf0[3] + bf0[19];
- bf1[4] = bf0[4] + bf0[20];
- bf1[5] = bf0[5] + bf0[21];
- bf1[6] = bf0[6] + bf0[22];
- bf1[7] = bf0[7] + bf0[23];
- bf1[8] = bf0[8] + bf0[24];
- bf1[9] = bf0[9] + bf0[25];
- bf1[10] = bf0[10] + bf0[26];
- bf1[11] = bf0[11] + bf0[27];
- bf1[12] = bf0[12] + bf0[28];
- bf1[13] = bf0[13] + bf0[29];
- bf1[14] = bf0[14] + bf0[30];
- bf1[15] = bf0[15] + bf0[31];
- bf1[16] = -bf0[16] + bf0[0];
- bf1[17] = -bf0[17] + bf0[1];
- bf1[18] = -bf0[18] + bf0[2];
- bf1[19] = -bf0[19] + bf0[3];
- bf1[20] = -bf0[20] + bf0[4];
- bf1[21] = -bf0[21] + bf0[5];
- bf1[22] = -bf0[22] + bf0[6];
- bf1[23] = -bf0[23] + bf0[7];
- bf1[24] = -bf0[24] + bf0[8];
- bf1[25] = -bf0[25] + bf0[9];
- bf1[26] = -bf0[26] + bf0[10];
- bf1[27] = -bf0[27] + bf0[11];
- bf1[28] = -bf0[28] + bf0[12];
- bf1[29] = -bf0[29] + bf0[13];
- bf1[30] = -bf0[30] + bf0[14];
- bf1[31] = -bf0[31] + bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
- bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
- bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
- bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
- bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
- bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
- bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
- bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[8];
- bf1[1] = bf0[1] + bf0[9];
- bf1[2] = bf0[2] + bf0[10];
- bf1[3] = bf0[3] + bf0[11];
- bf1[4] = bf0[4] + bf0[12];
- bf1[5] = bf0[5] + bf0[13];
- bf1[6] = bf0[6] + bf0[14];
- bf1[7] = bf0[7] + bf0[15];
- bf1[8] = -bf0[8] + bf0[0];
- bf1[9] = -bf0[9] + bf0[1];
- bf1[10] = -bf0[10] + bf0[2];
- bf1[11] = -bf0[11] + bf0[3];
- bf1[12] = -bf0[12] + bf0[4];
- bf1[13] = -bf0[13] + bf0[5];
- bf1[14] = -bf0[14] + bf0[6];
- bf1[15] = -bf0[15] + bf0[7];
- bf1[16] = bf0[16] + bf0[24];
- bf1[17] = bf0[17] + bf0[25];
- bf1[18] = bf0[18] + bf0[26];
- bf1[19] = bf0[19] + bf0[27];
- bf1[20] = bf0[20] + bf0[28];
- bf1[21] = bf0[21] + bf0[29];
- bf1[22] = bf0[22] + bf0[30];
- bf1[23] = bf0[23] + bf0[31];
- bf1[24] = -bf0[24] + bf0[16];
- bf1[25] = -bf0[25] + bf0[17];
- bf1[26] = -bf0[26] + bf0[18];
- bf1[27] = -bf0[27] + bf0[19];
- bf1[28] = -bf0[28] + bf0[20];
- bf1[29] = -bf0[29] + bf0[21];
- bf1[30] = -bf0[30] + bf0[22];
- bf1[31] = -bf0[31] + bf0[23];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
- bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = -bf0[4] + bf0[0];
- bf1[5] = -bf0[5] + bf0[1];
- bf1[6] = -bf0[6] + bf0[2];
- bf1[7] = -bf0[7] + bf0[3];
- bf1[8] = bf0[8] + bf0[12];
- bf1[9] = bf0[9] + bf0[13];
- bf1[10] = bf0[10] + bf0[14];
- bf1[11] = bf0[11] + bf0[15];
- bf1[12] = -bf0[12] + bf0[8];
- bf1[13] = -bf0[13] + bf0[9];
- bf1[14] = -bf0[14] + bf0[10];
- bf1[15] = -bf0[15] + bf0[11];
- bf1[16] = bf0[16] + bf0[20];
- bf1[17] = bf0[17] + bf0[21];
- bf1[18] = bf0[18] + bf0[22];
- bf1[19] = bf0[19] + bf0[23];
- bf1[20] = -bf0[20] + bf0[16];
- bf1[21] = -bf0[21] + bf0[17];
- bf1[22] = -bf0[22] + bf0[18];
- bf1[23] = -bf0[23] + bf0[19];
- bf1[24] = bf0[24] + bf0[28];
- bf1[25] = bf0[25] + bf0[29];
- bf1[26] = bf0[26] + bf0[30];
- bf1[27] = bf0[27] + bf0[31];
- bf1[28] = -bf0[28] + bf0[24];
- bf1[29] = -bf0[29] + bf0[25];
- bf1[30] = -bf0[30] + bf0[26];
- bf1[31] = -bf0[31] + bf0[27];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = bf0[26];
- bf1[27] = bf0[27];
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = -bf0[2] + bf0[0];
- bf1[3] = -bf0[3] + bf0[1];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = -bf0[6] + bf0[4];
- bf1[7] = -bf0[7] + bf0[5];
- bf1[8] = bf0[8] + bf0[10];
- bf1[9] = bf0[9] + bf0[11];
- bf1[10] = -bf0[10] + bf0[8];
- bf1[11] = -bf0[11] + bf0[9];
- bf1[12] = bf0[12] + bf0[14];
- bf1[13] = bf0[13] + bf0[15];
- bf1[14] = -bf0[14] + bf0[12];
- bf1[15] = -bf0[15] + bf0[13];
- bf1[16] = bf0[16] + bf0[18];
- bf1[17] = bf0[17] + bf0[19];
- bf1[18] = -bf0[18] + bf0[16];
- bf1[19] = -bf0[19] + bf0[17];
- bf1[20] = bf0[20] + bf0[22];
- bf1[21] = bf0[21] + bf0[23];
- bf1[22] = -bf0[22] + bf0[20];
- bf1[23] = -bf0[23] + bf0[21];
- bf1[24] = bf0[24] + bf0[26];
- bf1[25] = bf0[25] + bf0[27];
- bf1[26] = -bf0[26] + bf0[24];
- bf1[27] = -bf0[27] + bf0[25];
- bf1[28] = bf0[28] + bf0[30];
- bf1[29] = bf0[29] + bf0[31];
- bf1[30] = -bf0[30] + bf0[28];
- bf1[31] = -bf0[31] + bf0[29];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 10
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 11
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[16];
- bf1[2] = bf0[24];
- bf1[3] = -bf0[8];
- bf1[4] = bf0[12];
- bf1[5] = -bf0[28];
- bf1[6] = bf0[20];
- bf1[7] = -bf0[4];
- bf1[8] = bf0[6];
- bf1[9] = -bf0[22];
- bf1[10] = bf0[30];
- bf1[11] = -bf0[14];
- bf1[12] = bf0[10];
- bf1[13] = -bf0[26];
- bf1[14] = bf0[18];
- bf1[15] = -bf0[2];
- bf1[16] = bf0[3];
- bf1[17] = -bf0[19];
- bf1[18] = bf0[27];
- bf1[19] = -bf0[11];
- bf1[20] = bf0[15];
- bf1[21] = -bf0[31];
- bf1[22] = bf0[23];
- bf1[23] = -bf0[7];
- bf1[24] = bf0[5];
- bf1[25] = -bf0[21];
- bf1[26] = bf0[29];
- bf1[27] = -bf0[13];
- bf1[28] = bf0[9];
- bf1[29] = -bf0[25];
- bf1[30] = bf0[17];
- bf1[31] = -bf0[1];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-#if CONFIG_EXT_TX
-void av1_fidentity4_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 4; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * Sqrt2);
- range_check(0, input, output, 4, stage_range[0]);
-}
-
-void av1_fidentity8_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
- range_check(0, input, output, 8, stage_range[0]);
-}
-
-void av1_fidentity16_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 16; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
- range_check(0, input, output, 16, stage_range[0]);
-}
-
-void av1_fidentity32_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
- range_check(0, input, output, 32, stage_range[0]);
-}
-
-#if CONFIG_TX64X64
-void av1_fidentity64_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 64; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
- range_check(0, input, output, 64, stage_range[0]);
-}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
-
-#if CONFIG_TX64X64
-void av1_fdct64_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 64;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[64];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf1 = output;
- bf1[0] = input[0] + input[63];
- bf1[1] = input[1] + input[62];
- bf1[2] = input[2] + input[61];
- bf1[3] = input[3] + input[60];
- bf1[4] = input[4] + input[59];
- bf1[5] = input[5] + input[58];
- bf1[6] = input[6] + input[57];
- bf1[7] = input[7] + input[56];
- bf1[8] = input[8] + input[55];
- bf1[9] = input[9] + input[54];
- bf1[10] = input[10] + input[53];
- bf1[11] = input[11] + input[52];
- bf1[12] = input[12] + input[51];
- bf1[13] = input[13] + input[50];
- bf1[14] = input[14] + input[49];
- bf1[15] = input[15] + input[48];
- bf1[16] = input[16] + input[47];
- bf1[17] = input[17] + input[46];
- bf1[18] = input[18] + input[45];
- bf1[19] = input[19] + input[44];
- bf1[20] = input[20] + input[43];
- bf1[21] = input[21] + input[42];
- bf1[22] = input[22] + input[41];
- bf1[23] = input[23] + input[40];
- bf1[24] = input[24] + input[39];
- bf1[25] = input[25] + input[38];
- bf1[26] = input[26] + input[37];
- bf1[27] = input[27] + input[36];
- bf1[28] = input[28] + input[35];
- bf1[29] = input[29] + input[34];
- bf1[30] = input[30] + input[33];
- bf1[31] = input[31] + input[32];
- bf1[32] = -input[32] + input[31];
- bf1[33] = -input[33] + input[30];
- bf1[34] = -input[34] + input[29];
- bf1[35] = -input[35] + input[28];
- bf1[36] = -input[36] + input[27];
- bf1[37] = -input[37] + input[26];
- bf1[38] = -input[38] + input[25];
- bf1[39] = -input[39] + input[24];
- bf1[40] = -input[40] + input[23];
- bf1[41] = -input[41] + input[22];
- bf1[42] = -input[42] + input[21];
- bf1[43] = -input[43] + input[20];
- bf1[44] = -input[44] + input[19];
- bf1[45] = -input[45] + input[18];
- bf1[46] = -input[46] + input[17];
- bf1[47] = -input[47] + input[16];
- bf1[48] = -input[48] + input[15];
- bf1[49] = -input[49] + input[14];
- bf1[50] = -input[50] + input[13];
- bf1[51] = -input[51] + input[12];
- bf1[52] = -input[52] + input[11];
- bf1[53] = -input[53] + input[10];
- bf1[54] = -input[54] + input[9];
- bf1[55] = -input[55] + input[8];
- bf1[56] = -input[56] + input[7];
- bf1[57] = -input[57] + input[6];
- bf1[58] = -input[58] + input[5];
- bf1[59] = -input[59] + input[4];
- bf1[60] = -input[60] + input[3];
- bf1[61] = -input[61] + input[2];
- bf1[62] = -input[62] + input[1];
- bf1[63] = -input[63] + input[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[31];
- bf1[1] = bf0[1] + bf0[30];
- bf1[2] = bf0[2] + bf0[29];
- bf1[3] = bf0[3] + bf0[28];
- bf1[4] = bf0[4] + bf0[27];
- bf1[5] = bf0[5] + bf0[26];
- bf1[6] = bf0[6] + bf0[25];
- bf1[7] = bf0[7] + bf0[24];
- bf1[8] = bf0[8] + bf0[23];
- bf1[9] = bf0[9] + bf0[22];
- bf1[10] = bf0[10] + bf0[21];
- bf1[11] = bf0[11] + bf0[20];
- bf1[12] = bf0[12] + bf0[19];
- bf1[13] = bf0[13] + bf0[18];
- bf1[14] = bf0[14] + bf0[17];
- bf1[15] = bf0[15] + bf0[16];
- bf1[16] = -bf0[16] + bf0[15];
- bf1[17] = -bf0[17] + bf0[14];
- bf1[18] = -bf0[18] + bf0[13];
- bf1[19] = -bf0[19] + bf0[12];
- bf1[20] = -bf0[20] + bf0[11];
- bf1[21] = -bf0[21] + bf0[10];
- bf1[22] = -bf0[22] + bf0[9];
- bf1[23] = -bf0[23] + bf0[8];
- bf1[24] = -bf0[24] + bf0[7];
- bf1[25] = -bf0[25] + bf0[6];
- bf1[26] = -bf0[26] + bf0[5];
- bf1[27] = -bf0[27] + bf0[4];
- bf1[28] = -bf0[28] + bf0[3];
- bf1[29] = -bf0[29] + bf0[2];
- bf1[30] = -bf0[30] + bf0[1];
- bf1[31] = -bf0[31] + bf0[0];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = bf0[37];
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
- bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
- bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
- bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
- bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
- bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
- bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
- bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
- bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
- bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = bf0[58];
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = -bf0[8] + bf0[7];
- bf1[9] = -bf0[9] + bf0[6];
- bf1[10] = -bf0[10] + bf0[5];
- bf1[11] = -bf0[11] + bf0[4];
- bf1[12] = -bf0[12] + bf0[3];
- bf1[13] = -bf0[13] + bf0[2];
- bf1[14] = -bf0[14] + bf0[1];
- bf1[15] = -bf0[15] + bf0[0];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
- bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[47];
- bf1[33] = bf0[33] + bf0[46];
- bf1[34] = bf0[34] + bf0[45];
- bf1[35] = bf0[35] + bf0[44];
- bf1[36] = bf0[36] + bf0[43];
- bf1[37] = bf0[37] + bf0[42];
- bf1[38] = bf0[38] + bf0[41];
- bf1[39] = bf0[39] + bf0[40];
- bf1[40] = -bf0[40] + bf0[39];
- bf1[41] = -bf0[41] + bf0[38];
- bf1[42] = -bf0[42] + bf0[37];
- bf1[43] = -bf0[43] + bf0[36];
- bf1[44] = -bf0[44] + bf0[35];
- bf1[45] = -bf0[45] + bf0[34];
- bf1[46] = -bf0[46] + bf0[33];
- bf1[47] = -bf0[47] + bf0[32];
- bf1[48] = -bf0[48] + bf0[63];
- bf1[49] = -bf0[49] + bf0[62];
- bf1[50] = -bf0[50] + bf0[61];
- bf1[51] = -bf0[51] + bf0[60];
- bf1[52] = -bf0[52] + bf0[59];
- bf1[53] = -bf0[53] + bf0[58];
- bf1[54] = -bf0[54] + bf0[57];
- bf1[55] = -bf0[55] + bf0[56];
- bf1[56] = bf0[56] + bf0[55];
- bf1[57] = bf0[57] + bf0[54];
- bf1[58] = bf0[58] + bf0[53];
- bf1[59] = bf0[59] + bf0[52];
- bf1[60] = bf0[60] + bf0[51];
- bf1[61] = bf0[61] + bf0[50];
- bf1[62] = bf0[62] + bf0[49];
- bf1[63] = bf0[63] + bf0[48];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = -bf0[20] + bf0[19];
- bf1[21] = -bf0[21] + bf0[18];
- bf1[22] = -bf0[22] + bf0[17];
- bf1[23] = -bf0[23] + bf0[16];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[28] + bf0[27];
- bf1[29] = bf0[29] + bf0[26];
- bf1[30] = bf0[30] + bf0[25];
- bf1[31] = bf0[31] + bf0[24];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
- bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
- bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
- bf1[44] = bf0[44];
- bf1[45] = bf0[45];
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = bf0[50];
- bf1[51] = bf0[51];
- bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
- bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
- bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
- bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
- bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
- bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
- bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
- bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
- bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[39];
- bf1[33] = bf0[33] + bf0[38];
- bf1[34] = bf0[34] + bf0[37];
- bf1[35] = bf0[35] + bf0[36];
- bf1[36] = -bf0[36] + bf0[35];
- bf1[37] = -bf0[37] + bf0[34];
- bf1[38] = -bf0[38] + bf0[33];
- bf1[39] = -bf0[39] + bf0[32];
- bf1[40] = -bf0[40] + bf0[47];
- bf1[41] = -bf0[41] + bf0[46];
- bf1[42] = -bf0[42] + bf0[45];
- bf1[43] = -bf0[43] + bf0[44];
- bf1[44] = bf0[44] + bf0[43];
- bf1[45] = bf0[45] + bf0[42];
- bf1[46] = bf0[46] + bf0[41];
- bf1[47] = bf0[47] + bf0[40];
- bf1[48] = bf0[48] + bf0[55];
- bf1[49] = bf0[49] + bf0[54];
- bf1[50] = bf0[50] + bf0[53];
- bf1[51] = bf0[51] + bf0[52];
- bf1[52] = -bf0[52] + bf0[51];
- bf1[53] = -bf0[53] + bf0[50];
- bf1[54] = -bf0[54] + bf0[49];
- bf1[55] = -bf0[55] + bf0[48];
- bf1[56] = -bf0[56] + bf0[63];
- bf1[57] = -bf0[57] + bf0[62];
- bf1[58] = -bf0[58] + bf0[61];
- bf1[59] = -bf0[59] + bf0[60];
- bf1[60] = bf0[60] + bf0[59];
- bf1[61] = bf0[61] + bf0[58];
- bf1[62] = bf0[62] + bf0[57];
- bf1[63] = bf0[63] + bf0[56];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = -bf0[18] + bf0[17];
- bf1[19] = -bf0[19] + bf0[16];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[22] + bf0[21];
- bf1[23] = bf0[23] + bf0[20];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = -bf0[26] + bf0[25];
- bf1[27] = -bf0[27] + bf0[24];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[30] + bf0[29];
- bf1[31] = bf0[31] + bf0[28];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
- bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
- bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = bf0[41];
- bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
- bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
- bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
- bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
- bf1[54] = bf0[54];
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
- bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
- bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
- bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
- bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[35];
- bf1[33] = bf0[33] + bf0[34];
- bf1[34] = -bf0[34] + bf0[33];
- bf1[35] = -bf0[35] + bf0[32];
- bf1[36] = -bf0[36] + bf0[39];
- bf1[37] = -bf0[37] + bf0[38];
- bf1[38] = bf0[38] + bf0[37];
- bf1[39] = bf0[39] + bf0[36];
- bf1[40] = bf0[40] + bf0[43];
- bf1[41] = bf0[41] + bf0[42];
- bf1[42] = -bf0[42] + bf0[41];
- bf1[43] = -bf0[43] + bf0[40];
- bf1[44] = -bf0[44] + bf0[47];
- bf1[45] = -bf0[45] + bf0[46];
- bf1[46] = bf0[46] + bf0[45];
- bf1[47] = bf0[47] + bf0[44];
- bf1[48] = bf0[48] + bf0[51];
- bf1[49] = bf0[49] + bf0[50];
- bf1[50] = -bf0[50] + bf0[49];
- bf1[51] = -bf0[51] + bf0[48];
- bf1[52] = -bf0[52] + bf0[55];
- bf1[53] = -bf0[53] + bf0[54];
- bf1[54] = bf0[54] + bf0[53];
- bf1[55] = bf0[55] + bf0[52];
- bf1[56] = bf0[56] + bf0[59];
- bf1[57] = bf0[57] + bf0[58];
- bf1[58] = -bf0[58] + bf0[57];
- bf1[59] = -bf0[59] + bf0[56];
- bf1[60] = -bf0[60] + bf0[63];
- bf1[61] = -bf0[61] + bf0[62];
- bf1[62] = bf0[62] + bf0[61];
- bf1[63] = bf0[63] + bf0[60];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = -bf0[17] + bf0[16];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[19] + bf0[18];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = -bf0[21] + bf0[20];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[23] + bf0[22];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = -bf0[25] + bf0[24];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[27] + bf0[26];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = -bf0[29] + bf0[28];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[31] + bf0[30];
- bf1[32] = bf0[32];
- bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
- bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
- bf1[43] = bf0[43];
- bf1[44] = bf0[44];
- bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
- bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
- bf1[51] = bf0[51];
- bf1[52] = bf0[52];
- bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
- bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
- bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
- bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
- bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
- bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
- bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
- bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
- bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
- bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
- bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
- bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
- bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
- bf1[32] = bf0[32] + bf0[33];
- bf1[33] = -bf0[33] + bf0[32];
- bf1[34] = -bf0[34] + bf0[35];
- bf1[35] = bf0[35] + bf0[34];
- bf1[36] = bf0[36] + bf0[37];
- bf1[37] = -bf0[37] + bf0[36];
- bf1[38] = -bf0[38] + bf0[39];
- bf1[39] = bf0[39] + bf0[38];
- bf1[40] = bf0[40] + bf0[41];
- bf1[41] = -bf0[41] + bf0[40];
- bf1[42] = -bf0[42] + bf0[43];
- bf1[43] = bf0[43] + bf0[42];
- bf1[44] = bf0[44] + bf0[45];
- bf1[45] = -bf0[45] + bf0[44];
- bf1[46] = -bf0[46] + bf0[47];
- bf1[47] = bf0[47] + bf0[46];
- bf1[48] = bf0[48] + bf0[49];
- bf1[49] = -bf0[49] + bf0[48];
- bf1[50] = -bf0[50] + bf0[51];
- bf1[51] = bf0[51] + bf0[50];
- bf1[52] = bf0[52] + bf0[53];
- bf1[53] = -bf0[53] + bf0[52];
- bf1[54] = -bf0[54] + bf0[55];
- bf1[55] = bf0[55] + bf0[54];
- bf1[56] = bf0[56] + bf0[57];
- bf1[57] = -bf0[57] + bf0[56];
- bf1[58] = -bf0[58] + bf0[59];
- bf1[59] = bf0[59] + bf0[58];
- bf1[60] = bf0[60] + bf0[61];
- bf1[61] = -bf0[61] + bf0[60];
- bf1[62] = -bf0[62] + bf0[63];
- bf1[63] = bf0[63] + bf0[62];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 10
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = bf0[26];
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
- bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
- bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
- bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
- bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
- bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
- bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
- bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
- bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
- bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
- bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
- bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
- bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
- bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
- bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
- bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
- bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
- bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
- bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
- bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
- bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
- bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
- bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
- bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 11
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[32];
- bf1[2] = bf0[16];
- bf1[3] = bf0[48];
- bf1[4] = bf0[8];
- bf1[5] = bf0[40];
- bf1[6] = bf0[24];
- bf1[7] = bf0[56];
- bf1[8] = bf0[4];
- bf1[9] = bf0[36];
- bf1[10] = bf0[20];
- bf1[11] = bf0[52];
- bf1[12] = bf0[12];
- bf1[13] = bf0[44];
- bf1[14] = bf0[28];
- bf1[15] = bf0[60];
- bf1[16] = bf0[2];
- bf1[17] = bf0[34];
- bf1[18] = bf0[18];
- bf1[19] = bf0[50];
- bf1[20] = bf0[10];
- bf1[21] = bf0[42];
- bf1[22] = bf0[26];
- bf1[23] = bf0[58];
- bf1[24] = bf0[6];
- bf1[25] = bf0[38];
- bf1[26] = bf0[22];
- bf1[27] = bf0[54];
- bf1[28] = bf0[14];
- bf1[29] = bf0[46];
- bf1[30] = bf0[30];
- bf1[31] = bf0[62];
- bf1[32] = bf0[1];
- bf1[33] = bf0[33];
- bf1[34] = bf0[17];
- bf1[35] = bf0[49];
- bf1[36] = bf0[9];
- bf1[37] = bf0[41];
- bf1[38] = bf0[25];
- bf1[39] = bf0[57];
- bf1[40] = bf0[5];
- bf1[41] = bf0[37];
- bf1[42] = bf0[21];
- bf1[43] = bf0[53];
- bf1[44] = bf0[13];
- bf1[45] = bf0[45];
- bf1[46] = bf0[29];
- bf1[47] = bf0[61];
- bf1[48] = bf0[3];
- bf1[49] = bf0[35];
- bf1[50] = bf0[19];
- bf1[51] = bf0[51];
- bf1[52] = bf0[11];
- bf1[53] = bf0[43];
- bf1[54] = bf0[27];
- bf1[55] = bf0[59];
- bf1[56] = bf0[7];
- bf1[57] = bf0[39];
- bf1[58] = bf0[23];
- bf1[59] = bf0[55];
- bf1[60] = bf0[15];
- bf1[61] = bf0[47];
- bf1[62] = bf0[31];
- bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-#endif // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d.h b/third_party/aom/av1/common/av1_fwd_txfm1d.h
deleted file mode 100644
index f880239f7..000000000
--- a/third_party/aom/av1/common/av1_fwd_txfm1d.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_FWD_TXFM1D_H_
-#define AV1_FWD_TXFM1D_H_
-
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
- const int8_t *stage_range);
-void av1_fdct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
- const int8_t *stage_range);
-void av1_fdct16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fdct32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_TX64X64
-void av1_fdct64_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#endif // CONFIG_TX64X64
-
-void av1_fadst4_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst8_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_EXT_TX
-void av1_fidentity4_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fidentity8_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fidentity16_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fidentity32_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_TX64X64
-void av1_fidentity64_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AV1_FWD_TXFM1D_H_
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h b/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h
deleted file mode 100644
index f2ed93151..000000000
--- a/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_FWD_TXFM2D_CFG_H_
-#define AV1_FWD_TXFM2D_CFG_H_
-#include "av1/common/enums.h"
-#include "av1/common/av1_fwd_txfm1d.h"
-
-// ---------------- 4x4 1D constants -----------------------
-// shift
-static const int8_t fwd_shift_4[3] = { 2, 0, 0 };
-
-// stage range
-static const int8_t fwd_stage_range_col_dct_4[4] = { 0, 1, 2, 2 };
-static const int8_t fwd_stage_range_row_dct_4[4] = { 2, 3, 3, 3 };
-static const int8_t fwd_stage_range_col_adst_4[6] = { 0, 0, 1, 2, 2, 2 };
-static const int8_t fwd_stage_range_row_adst_4[6] = { 2, 2, 2, 3, 3, 3 };
-static const int8_t fwd_stage_range_idx_4[1] = { 0 };
-
-// cos bit
-static const int8_t fwd_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-// ---------------- 8x8 1D constants -----------------------
-// shift
-static const int8_t fwd_shift_8[3] = { 2, -1, 0 };
-
-// stage range
-static const int8_t fwd_stage_range_col_dct_8[6] = { 0, 1, 2, 3, 3, 3 };
-static const int8_t fwd_stage_range_row_dct_8[6] = { 3, 4, 5, 5, 5, 5 };
-static const int8_t fwd_stage_range_col_adst_8[8] = { 0, 0, 1, 2, 2, 3, 3, 3 };
-static const int8_t fwd_stage_range_row_adst_8[8] = { 3, 3, 3, 4, 4, 5, 5, 5 };
-static const int8_t fwd_stage_range_idx_8[1] = { 0 };
-
-// cos bit
-static const int8_t fwd_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_col_adst_8[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t fwd_cos_bit_row_adst_8[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-
-// ---------------- 16x16 1D constants -----------------------
-// shift
-static const int8_t fwd_shift_16[3] = { 2, -2, 0 };
-
-// stage range
-static const int8_t fwd_stage_range_col_dct_16[8] = { 0, 1, 2, 3, 4, 4, 4, 4 };
-static const int8_t fwd_stage_range_row_dct_16[8] = { 4, 5, 6, 7, 7, 7, 7, 7 };
-static const int8_t fwd_stage_range_col_adst_16[10] = { 0, 0, 1, 2, 2,
- 3, 3, 4, 4, 4 };
-static const int8_t fwd_stage_range_row_adst_16[10] = {
- 4, 4, 4, 5, 5, 6, 6, 7, 7, 7,
-};
-static const int8_t fwd_stage_range_idx_16[1] = { 0 };
-
-// cos bit
-static const int8_t fwd_cos_bit_col_dct_16[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t fwd_cos_bit_row_dct_16[8] = {
- 12, 12, 12, 12, 12, 12, 12, 12
-};
-static const int8_t fwd_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12 };
-
-// ---------------- 32x32 1D constants -----------------------
-// shift
-static const int8_t fwd_shift_32[3] = { 2, -4, 0 };
-
-// stage range
-static const int8_t fwd_stage_range_col_dct_32[10] = { 0, 1, 2, 3, 4,
- 5, 5, 5, 5, 5 };
-static const int8_t fwd_stage_range_row_dct_32[10] = { 5, 6, 7, 8, 9,
- 9, 9, 9, 9, 9 };
-static const int8_t fwd_stage_range_col_adst_32[12] = { 0, 0, 1, 2, 2, 3,
- 3, 4, 4, 5, 5, 5 };
-static const int8_t fwd_stage_range_row_adst_32[12] = { 5, 5, 5, 6, 6, 7,
- 7, 8, 8, 9, 9, 9 };
-static const int8_t fwd_stage_range_idx_32[1] = { 0 };
-
-// cos bit
-static const int8_t fwd_cos_bit_col_dct_32[10] = { 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12 };
-static const int8_t fwd_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12 };
-static const int8_t fwd_cos_bit_col_adst_32[12] = { 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12 };
-static const int8_t fwd_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12 };
-
-// ---------------- 64x64 1D constants -----------------------
-// shift
-static const int8_t fwd_shift_64[3] = { 0, -2, -2 };
-
-// stage range
-static const int8_t fwd_stage_range_col_dct_64[12] = { 0, 1, 2, 3, 4, 5,
- 6, 6, 6, 6, 6, 6 };
-static const int8_t fwd_stage_range_row_dct_64[12] = { 6, 7, 8, 9, 10, 11,
- 11, 11, 11, 11, 11, 11 };
-static const int8_t fwd_stage_range_idx_64[1] = { 0 };
-
-// cos bit
-static const int8_t fwd_cos_bit_col_dct_64[12] = { 15, 15, 15, 15, 15, 14,
- 13, 13, 13, 13, 13, 13 };
-static const int8_t fwd_cos_bit_row_dct_64[12] = { 15, 14, 13, 12, 11, 10,
- 10, 10, 10, 10, 10, 10 };
-
-// ---------------- row config fwd_dct_4 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_4 = {
- 4, // .txfm_size
- 4, // .stage_num
- // 0, // .log_scale
- fwd_shift_4, // .shift
- fwd_stage_range_row_dct_4, // .stage_range
- fwd_cos_bit_row_dct_4, // .cos_bit
- TXFM_TYPE_DCT4 // .txfm_type
-};
-
-// ---------------- row config fwd_dct_8 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_8 = {
- 8, // .txfm_size
- 6, // .stage_num
- // 0, // .log_scale
- fwd_shift_8, // .shift
- fwd_stage_range_row_dct_8, // .stage_range
- fwd_cos_bit_row_dct_8, // .cos_bit_
- TXFM_TYPE_DCT8 // .txfm_type
-};
-// ---------------- row config fwd_dct_16 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_16 = {
- 16, // .txfm_size
- 8, // .stage_num
- // 0, // .log_scale
- fwd_shift_16, // .shift
- fwd_stage_range_row_dct_16, // .stage_range
- fwd_cos_bit_row_dct_16, // .cos_bit
- TXFM_TYPE_DCT16 // .txfm_type
-};
-
-// ---------------- row config fwd_dct_32 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_32 = {
- 32, // .txfm_size
- 10, // .stage_num
- // 1, // .log_scale
- fwd_shift_32, // .shift
- fwd_stage_range_row_dct_32, // .stage_range
- fwd_cos_bit_row_dct_32, // .cos_bit_row
- TXFM_TYPE_DCT32 // .txfm_type
-};
-
-// ---------------- row config fwd_dct_64 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_dct_64 = {
- 64, // .txfm_size
- 12, // .stage_num
- fwd_shift_64, // .shift
- fwd_stage_range_row_dct_64, // .stage_range
- fwd_cos_bit_row_dct_64, // .cos_bit
- TXFM_TYPE_DCT64, // .txfm_type_col
-};
-
-// ---------------- row config fwd_adst_4 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_4 = {
- 4, // .txfm_size
- 6, // .stage_num
- // 0, // .log_scale
- fwd_shift_4, // .shift
- fwd_stage_range_row_adst_4, // .stage_range
- fwd_cos_bit_row_adst_4, // .cos_bit
- TXFM_TYPE_ADST4, // .txfm_type
-};
-
-// ---------------- row config fwd_adst_8 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_8 = {
- 8, // .txfm_size
- 8, // .stage_num
- // 0, // .log_scale
- fwd_shift_8, // .shift
- fwd_stage_range_row_adst_8, // .stage_range
- fwd_cos_bit_row_adst_8, // .cos_bit
- TXFM_TYPE_ADST8, // .txfm_type_col
-};
-
-// ---------------- row config fwd_adst_16 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_16 = {
- 16, // .txfm_size
- 10, // .stage_num
- // 0, // .log_scale
- fwd_shift_16, // .shift
- fwd_stage_range_row_adst_16, // .stage_range
- fwd_cos_bit_row_adst_16, // .cos_bit
- TXFM_TYPE_ADST16, // .txfm_type
-};
-
-// ---------------- row config fwd_adst_32 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_row_cfg_adst_32 = {
- 32, // .txfm_size
- 12, // .stage_num
- // 1, // .log_scale
- fwd_shift_32, // .shift
- fwd_stage_range_row_adst_32, // .stage_range
- fwd_cos_bit_row_adst_32, // .cos_bit
- TXFM_TYPE_ADST32, // .txfm_type
-};
-
-// ---------------- col config fwd_dct_4 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_4 = {
- 4, // .txfm_size
- 4, // .stage_num
- // 0, // .log_scale
- fwd_shift_4, // .shift
- fwd_stage_range_col_dct_4, // .stage_range
- fwd_cos_bit_col_dct_4, // .cos_bit
- TXFM_TYPE_DCT4 // .txfm_type
-};
-
-// ---------------- col config fwd_dct_8 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_8 = {
- 8, // .txfm_size
- 6, // .stage_num
- // 0, // .log_scale
- fwd_shift_8, // .shift
- fwd_stage_range_col_dct_8, // .stage_range
- fwd_cos_bit_col_dct_8, // .cos_bit_
- TXFM_TYPE_DCT8 // .txfm_type
-};
-// ---------------- col config fwd_dct_16 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_16 = {
- 16, // .txfm_size
- 8, // .stage_num
- // 0, // .log_scale
- fwd_shift_16, // .shift
- fwd_stage_range_col_dct_16, // .stage_range
- fwd_cos_bit_col_dct_16, // .cos_bit
- TXFM_TYPE_DCT16 // .txfm_type
-};
-
-// ---------------- col config fwd_dct_32 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_32 = {
- 32, // .txfm_size
- 10, // .stage_num
- // 1, // .log_scale
- fwd_shift_32, // .shift
- fwd_stage_range_col_dct_32, // .stage_range
- fwd_cos_bit_col_dct_32, // .cos_bit_col
- TXFM_TYPE_DCT32 // .txfm_type
-};
-
-// ---------------- col config fwd_dct_64 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_dct_64 = {
- 64, // .txfm_size
- 12, // .stage_num
- fwd_shift_64, // .shift
- fwd_stage_range_col_dct_64, // .stage_range
- fwd_cos_bit_col_dct_64, // .cos_bit
- TXFM_TYPE_DCT64, // .txfm_type_col
-};
-
-// ---------------- col config fwd_adst_4 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_4 = {
- 4, // .txfm_size
- 6, // .stage_num
- // 0, // .log_scale
- fwd_shift_4, // .shift
- fwd_stage_range_col_adst_4, // .stage_range
- fwd_cos_bit_col_adst_4, // .cos_bit
- TXFM_TYPE_ADST4, // .txfm_type
-};
-
-// ---------------- col config fwd_adst_8 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_8 = {
- 8, // .txfm_size
- 8, // .stage_num
- // 0, // .log_scale
- fwd_shift_8, // .shift
- fwd_stage_range_col_adst_8, // .stage_range
- fwd_cos_bit_col_adst_8, // .cos_bit
- TXFM_TYPE_ADST8, // .txfm_type_col
-};
-
-// ---------------- col config fwd_adst_16 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_16 = {
- 16, // .txfm_size
- 10, // .stage_num
- // 0, // .log_scale
- fwd_shift_16, // .shift
- fwd_stage_range_col_adst_16, // .stage_range
- fwd_cos_bit_col_adst_16, // .cos_bit
- TXFM_TYPE_ADST16, // .txfm_type
-};
-
-// ---------------- col config fwd_adst_32 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_col_cfg_adst_32 = {
- 32, // .txfm_size
- 12, // .stage_num
- // 1, // .log_scale
- fwd_shift_32, // .shift
- fwd_stage_range_col_adst_32, // .stage_range
- fwd_cos_bit_col_adst_32, // .cos_bit
- TXFM_TYPE_ADST32, // .txfm_type
-};
-
-#if CONFIG_EXT_TX
-// identity does not need to differentiate between row and col
-// ---------------- row/col config fwd_identity_4 ----------
-static const TXFM_1D_CFG fwd_txfm_1d_cfg_identity_4 = {
- 4, // .txfm_size
- 1, // .stage_num
- // 0, // .log_scale
- fwd_shift_4, // .shift
- fwd_stage_range_idx_4, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY4, // .txfm_type
-};
-
-// ---------------- row/col config fwd_identity_8 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_cfg_identity_8 = {
- 8, // .txfm_size
- 1, // .stage_num
- // 0, // .log_scale
- fwd_shift_8, // .shift
- fwd_stage_range_idx_8, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY8, // .txfm_type
-};
-
-// ---------------- row/col config fwd_identity_16 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_cfg_identity_16 = {
- 16, // .txfm_size
- 1, // .stage_num
- // 0, // .log_scale
- fwd_shift_16, // .shift
- fwd_stage_range_idx_16, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY16, // .txfm_type
-};
-
-// ---------------- row/col config fwd_identity_32 ----------------
-static const TXFM_1D_CFG fwd_txfm_1d_cfg_identity_32 = {
- 32, // .txfm_size
- 1, // .stage_num
- // 1, // .log_scale
- fwd_shift_32, // .shift
- fwd_stage_range_idx_32, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY32, // .txfm_type
-};
-#endif // CONFIG_EXT_TX
-#endif // AV1_FWD_TXFM2D_CFG_H_
diff --git a/third_party/aom/av1/common/av1_fwd_txfm2d.c b/third_party/aom/av1/common/av1_fwd_txfm2d.c
deleted file mode 100644
index 740c63322..000000000
--- a/third_party/aom/av1/common/av1_fwd_txfm2d.c
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "./av1_rtcd.h"
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/enums.h"
-#include "av1/common/av1_fwd_txfm1d.h"
-#include "av1/common/av1_fwd_txfm1d_cfg.h"
-#include "av1/common/av1_txfm.h"
-
-static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
- switch (txfm_type) {
- case TXFM_TYPE_DCT4: return av1_fdct4_new;
- case TXFM_TYPE_DCT8: return av1_fdct8_new;
- case TXFM_TYPE_DCT16: return av1_fdct16_new;
- case TXFM_TYPE_DCT32: return av1_fdct32_new;
-#if CONFIG_TX64X64
- case TXFM_TYPE_DCT64: return av1_fdct64_new;
-#endif // CONFIG_TX64X64
- case TXFM_TYPE_ADST4: return av1_fadst4_new;
- case TXFM_TYPE_ADST8: return av1_fadst8_new;
- case TXFM_TYPE_ADST16: return av1_fadst16_new;
- case TXFM_TYPE_ADST32: return av1_fadst32_new;
-#if CONFIG_EXT_TX
- case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
- case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
- case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
- case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
-#if CONFIG_TX64X64
- case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
- default: assert(0); return NULL;
- }
-}
-
-void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, int bd) {
- // Note when assigning txfm_size_col, we use the txfm_size from the
- // row configuration and vice versa. This is intentionally done to
- // accurately perform rectangular transforms. When the transform is
- // rectangular, the number of columns will be the same as the
- // txfm_size stored in the row cfg struct. It will make no difference
- // for square transforms.
- const int txfm_size_col = cfg->row_cfg->txfm_size;
- const int txfm_size_row = cfg->col_cfg->txfm_size;
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
- : cfg->col_cfg->shift;
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_col[i] = cfg->col_cfg->stage_range[i] + shift[0] + bd + 1;
- }
-
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_row[i] =
- cfg->row_cfg->stage_range[i] + shift[0] + shift[1] + bd + 1;
- }
-}
-
-static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_FLIP_CFG *cfg,
- int32_t *buf, int bd) {
- int c, r;
- // Note when assigning txfm_size_col, we use the txfm_size from the
- // row configuration and vice versa. This is intentionally done to
- // accurately perform rectangular transforms. When the transform is
- // rectangular, the number of columns will be the same as the
- // txfm_size stored in the row cfg struct. It will make no difference
- // for square transforms.
- const int txfm_size_col = cfg->row_cfg->txfm_size;
- const int txfm_size_row = cfg->col_cfg->txfm_size;
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
- : cfg->col_cfg->shift;
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
- assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
- av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);
-
- const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
- const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
- const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
- const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
-
- // use output buffer as temp buffer
- int32_t *temp_in = output;
- int32_t *temp_out = output + txfm_size_row;
-
- // Columns
- for (c = 0; c < txfm_size_col; ++c) {
- if (cfg->ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c];
- } else {
- for (r = 0; r < txfm_size_row; ++r)
- // flip upside down
- temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
- }
- round_shift_array(temp_in, txfm_size_row, -shift[0]);
- // Multiply everything by Sqrt2 on the larger dimension if the
- // transform is rectangular
- if (txfm_size_col > txfm_size_row) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = (int32_t)fdct_round_shift(temp_in[r] * Sqrt2);
- }
- txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
- round_shift_array(temp_out, txfm_size_row, -shift[1]);
- if (cfg->lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- buf[r * txfm_size_col + c] = temp_out[r];
- } else {
- for (r = 0; r < txfm_size_row; ++r)
- // flip from left to right
- buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
- }
- }
-
- // Rows
- for (r = 0; r < txfm_size_row; ++r) {
- // Multiply everything by Sqrt2 on the larger dimension if the
- // transform is rectangular
- if (txfm_size_row > txfm_size_col) {
- for (c = 0; c < txfm_size_col; ++c)
- buf[r * txfm_size_col + c] =
- (int32_t)fdct_round_shift(buf[r * txfm_size_col + c] * Sqrt2);
- }
- txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
- cos_bit_row, stage_range_row);
- round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
- }
-}
-
-void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int32_t txfm_buf[4 * 8];
- int16_t rinput[4 * 8];
- TX_SIZE tx_size = TX_4X8;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int16(rinput, rw, input, stride, w, h);
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
- fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
- transpose_int32(output, w, txfm_buf, rw, rw, rh);
-#else
- int32_t txfm_buf[4 * 8];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X8);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-#endif
-}
-
-void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[8 * 4];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X4);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int32_t txfm_buf[8 * 16];
- int16_t rinput[8 * 16];
- TX_SIZE tx_size = TX_8X16;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int16(rinput, rw, input, stride, w, h);
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
- fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
- transpose_int32(output, w, txfm_buf, rw, rw, rh);
-#else
- int32_t txfm_buf[8 * 16];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X16);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-#endif
-}
-
-void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[16 * 8];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X8);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int32_t txfm_buf[16 * 32];
- int16_t rinput[16 * 32];
- TX_SIZE tx_size = TX_16X32;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int16(rinput, rw, input, stride, w, h);
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
- fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
- transpose_int32(output, w, txfm_buf, rw, rw, rh);
-#else
- int32_t txfm_buf[16 * 32];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X32);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-#endif
-}
-
-void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 16];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X16);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[4 * 4];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X4);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[8 * 8];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X8);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[16 * 16];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X16);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 32];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-#if CONFIG_TX64X64
-void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[64 * 64];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 64];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_32x64_cfg(tx_type);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[64 * 32];
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x32_cfg(tx_type);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-#endif // CONFIG_TX64X64
-
-static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
- // DCT
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_col_cfg_dct_4, &fwd_txfm_1d_col_cfg_dct_8,
- &fwd_txfm_1d_col_cfg_dct_16, &fwd_txfm_1d_col_cfg_dct_32 },
- // ADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
- &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
-#if CONFIG_EXT_TX
- // FLIPADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
- &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
- // IDENTITY
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_cfg_identity_4, &fwd_txfm_1d_cfg_identity_8,
- &fwd_txfm_1d_cfg_identity_16, &fwd_txfm_1d_cfg_identity_32 },
-#endif // CONFIG_EXT_TX
-};
-
-static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
- // DCT
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_row_cfg_dct_4, &fwd_txfm_1d_row_cfg_dct_8,
- &fwd_txfm_1d_row_cfg_dct_16, &fwd_txfm_1d_row_cfg_dct_32 },
- // ADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
- &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
-#if CONFIG_EXT_TX
- // FLIPADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
- &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
- // IDENTITY
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &fwd_txfm_1d_cfg_identity_4, &fwd_txfm_1d_cfg_identity_8,
- &fwd_txfm_1d_cfg_identity_16, &fwd_txfm_1d_cfg_identity_32 },
-#endif // CONFIG_EXT_TX
-};
-
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) {
- TXFM_2D_FLIP_CFG cfg;
- set_flip_cfg(tx_type, &cfg);
- const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
- const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
- const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
- const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
- cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
- cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
- return cfg;
-}
-
-#if CONFIG_TX64X64
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type) {
- TXFM_2D_FLIP_CFG cfg;
- const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
- const TX_SIZE tx_size_row = txsize_horz_map[TX_32X64];
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
- cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
- cfg.ud_flip = 0;
- cfg.lr_flip = 0;
- break;
- default: assert(0);
- }
- return cfg;
-}
-
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type) {
- TXFM_2D_FLIP_CFG cfg;
- const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
- const TX_SIZE tx_size_col = txsize_vert_map[TX_64X32];
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
- cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
- cfg.ud_flip = 0;
- cfg.lr_flip = 0;
- break;
- default: assert(0);
- }
- return cfg;
-}
-
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type) {
- TXFM_2D_FLIP_CFG cfg;
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
- cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
- cfg.ud_flip = 0;
- cfg.lr_flip = 0;
- break;
- default:
- cfg.ud_flip = 0;
- cfg.lr_flip = 0;
- assert(0);
- }
- return cfg;
-}
-#endif // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.c b/third_party/aom/av1/common/av1_inv_txfm1d.c
index 51f4b6362..8514dc64c 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d.c
+++ b/third_party/aom/av1/common/av1_inv_txfm1d.c
@@ -10,28 +10,28 @@
*/
#include <stdlib.h>
-#include "aom_dsp/inv_txfm.h"
#include "av1/common/av1_inv_txfm1d.h"
-#if CONFIG_COEFFICIENT_RANGE_CHECKING
-void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
- int32_t size, int8_t bit) {
- const int64_t maxValue = (1LL << (bit - 1)) - 1;
- const int64_t minValue = -(1LL << (bit - 1));
+static void range_check_buf(int32_t stage, const int32_t *input,
+ const int32_t *buf, int32_t size, int8_t bit) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ const int64_t max_value = (1LL << (bit - 1)) - 1;
+ const int64_t min_value = -(1LL << (bit - 1));
int in_range = 1;
for (int i = 0; i < size; ++i) {
- if (buf[i] < minValue || buf[i] > maxValue) {
+ if (buf[i] < min_value || buf[i] > max_value) {
in_range = 0;
}
}
if (!in_range) {
fprintf(stderr, "Error: coeffs contain out-of-range values\n");
+ fprintf(stderr, "size: %d\n", size);
fprintf(stderr, "stage: %d\n", stage);
- fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", minValue,
- maxValue);
+ fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", min_value,
+ max_value);
fprintf(stderr, "coeffs: ");
@@ -53,81 +53,73 @@ void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
}
assert(in_range);
-}
-
-#define range_check(stage, input, buf, size, bit) \
- range_check_func(stage, input, buf, size, bit)
#else
-#define range_check(stage, input, buf, size, bit) \
- { \
- (void)stage; \
- (void)input; \
- (void)buf; \
- (void)size; \
- (void)bit; \
- }
+ (void)stage;
+ (void)input;
+ (void)buf;
+ (void)size;
+ (void)bit;
#endif
+}
// TODO(angiebird): Make 1-d txfm functions static
-void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
+//
+
+void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 4;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[4];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
bf1[0] = input[0];
bf1[1] = input[2];
bf1[2] = input[1];
bf1[3] = input[3];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = bf0[1] - bf0[2];
- bf1[3] = bf0[0] - bf0[3];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
}
-void av1_idct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
+void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 8;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[8];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
bf1[0] = input[0];
bf1[1] = input[4];
@@ -137,83 +129,78 @@ void av1_idct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
bf1[5] = input[5];
bf1[6] = input[3];
bf1[7] = input[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = bf0[4] - bf0[5];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[6] + bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
+ bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = bf0[1] - bf0[2];
- bf1[3] = bf0[0] - bf0[3];
+ bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
bf1[7] = bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 5
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = bf0[3] - bf0[4];
- bf1[5] = bf0[2] - bf0[5];
- bf1[6] = bf0[1] - bf0[6];
- bf1[7] = bf0[0] - bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
}
-void av1_idct16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 16;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[16];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
bf1[0] = input[0];
bf1[1] = input[8];
@@ -231,11 +218,10 @@ void av1_idct16_new(const int32_t *input, int32_t *output,
bf1[13] = input[11];
bf1[14] = input[7];
bf1[15] = input[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
@@ -246,146 +232,140 @@ void av1_idct16_new(const int32_t *input, int32_t *output,
bf1[5] = bf0[5];
bf1[6] = bf0[6];
bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = bf0[8] - bf0[9];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[10] + bf0[11];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = bf0[12] - bf0[13];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[14] + bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
+ bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = bf0[4] - bf0[5];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[6] + bf0[7];
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
+ bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
bf1[11] = bf0[11];
bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
bf1[15] = bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 5
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = bf0[1] - bf0[2];
- bf1[3] = bf0[0] - bf0[3];
+ bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = bf0[9] - bf0[10];
- bf1[11] = bf0[8] - bf0[11];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[13] + bf0[14];
- bf1[15] = bf0[12] + bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
+ bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 6
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = bf0[3] - bf0[4];
- bf1[5] = bf0[2] - bf0[5];
- bf1[6] = bf0[1] - bf0[6];
- bf1[7] = bf0[0] - bf0[7];
+ bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 7
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = bf0[7] - bf0[8];
- bf1[9] = bf0[6] - bf0[9];
- bf1[10] = bf0[5] - bf0[10];
- bf1[11] = bf0[4] - bf0[11];
- bf1[12] = bf0[3] - bf0[12];
- bf1[13] = bf0[2] - bf0[13];
- bf1[14] = bf0[1] - bf0[14];
- bf1[15] = bf0[0] - bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
}
-void av1_idct32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 32;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[32];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
bf1[0] = input[0];
bf1[1] = input[16];
@@ -419,11 +399,10 @@ void av1_idct32_new(const int32_t *input, int32_t *output,
bf1[29] = input[23];
bf1[30] = input[15];
bf1[31] = input[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
@@ -442,27 +421,26 @@ void av1_idct32_new(const int32_t *input, int32_t *output,
bf1[13] = bf0[13];
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
- bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
- bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
- bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
- bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
bf1[0] = bf0[0];
@@ -473,572 +451,506 @@ void av1_idct32_new(const int32_t *input, int32_t *output,
bf1[5] = bf0[5];
bf1[6] = bf0[6];
bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = bf0[16] - bf0[17];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[18] + bf0[19];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = bf0[20] - bf0[21];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[22] + bf0[23];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = bf0[24] - bf0[25];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[26] + bf0[27];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = bf0[28] - bf0[29];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[30] + bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
+ bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
+ bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
+ bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = bf0[8] - bf0[9];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[10] + bf0[11];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = bf0[12] - bf0[13];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[14] + bf0[15];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
+ bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
bf1[19] = bf0[19];
bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
bf1[23] = bf0[23];
bf1[24] = bf0[24];
- bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
bf1[27] = bf0[27];
bf1[28] = bf0[28];
- bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 5
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = bf0[4] - bf0[5];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[6] + bf0[7];
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
+ bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
bf1[11] = bf0[11];
bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = bf0[17] - bf0[18];
- bf1[19] = bf0[16] - bf0[19];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[21] + bf0[22];
- bf1[23] = bf0[20] + bf0[23];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = bf0[25] - bf0[26];
- bf1[27] = bf0[24] - bf0[27];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[29] + bf0[30];
- bf1[31] = bf0[28] + bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
+ bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
+ bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
+ bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 6
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = bf0[1] - bf0[2];
- bf1[3] = bf0[0] - bf0[3];
+ bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = bf0[9] - bf0[10];
- bf1[11] = bf0[8] - bf0[11];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[13] + bf0[14];
- bf1[15] = bf0[12] + bf0[15];
+ bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
+ bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
bf1[22] = bf0[22];
bf1[23] = bf0[23];
bf1[24] = bf0[24];
bf1[25] = bf0[25];
- bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
- bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
bf1[30] = bf0[30];
bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 7
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = bf0[3] - bf0[4];
- bf1[5] = bf0[2] - bf0[5];
- bf1[6] = bf0[1] - bf0[6];
- bf1[7] = bf0[0] - bf0[7];
+ bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = bf0[19] - bf0[20];
- bf1[21] = bf0[18] - bf0[21];
- bf1[22] = bf0[17] - bf0[22];
- bf1[23] = bf0[16] - bf0[23];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[27] + bf0[28];
- bf1[29] = bf0[26] + bf0[29];
- bf1[30] = bf0[25] + bf0[30];
- bf1[31] = bf0[24] + bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
+ bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
+ bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
+ bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 8
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = bf0[7] - bf0[8];
- bf1[9] = bf0[6] - bf0[9];
- bf1[10] = bf0[5] - bf0[10];
- bf1[11] = bf0[4] - bf0[11];
- bf1[12] = bf0[3] - bf0[12];
- bf1[13] = bf0[2] - bf0[13];
- bf1[14] = bf0[1] - bf0[14];
- bf1[15] = bf0[0] - bf0[15];
+ bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
bf1[17] = bf0[17];
bf1[18] = bf0[18];
bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
bf1[28] = bf0[28];
bf1[29] = bf0[29];
bf1[30] = bf0[30];
bf1[31] = bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 9
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[31];
- bf1[1] = bf0[1] + bf0[30];
- bf1[2] = bf0[2] + bf0[29];
- bf1[3] = bf0[3] + bf0[28];
- bf1[4] = bf0[4] + bf0[27];
- bf1[5] = bf0[5] + bf0[26];
- bf1[6] = bf0[6] + bf0[25];
- bf1[7] = bf0[7] + bf0[24];
- bf1[8] = bf0[8] + bf0[23];
- bf1[9] = bf0[9] + bf0[22];
- bf1[10] = bf0[10] + bf0[21];
- bf1[11] = bf0[11] + bf0[20];
- bf1[12] = bf0[12] + bf0[19];
- bf1[13] = bf0[13] + bf0[18];
- bf1[14] = bf0[14] + bf0[17];
- bf1[15] = bf0[15] + bf0[16];
- bf1[16] = bf0[15] - bf0[16];
- bf1[17] = bf0[14] - bf0[17];
- bf1[18] = bf0[13] - bf0[18];
- bf1[19] = bf0[12] - bf0[19];
- bf1[20] = bf0[11] - bf0[20];
- bf1[21] = bf0[10] - bf0[21];
- bf1[22] = bf0[9] - bf0[22];
- bf1[23] = bf0[8] - bf0[23];
- bf1[24] = bf0[7] - bf0[24];
- bf1[25] = bf0[6] - bf0[25];
- bf1[26] = bf0[5] - bf0[26];
- bf1[27] = bf0[4] - bf0[27];
- bf1[28] = bf0[3] - bf0[28];
- bf1[29] = bf0[2] - bf0[29];
- bf1[30] = bf0[1] - bf0[30];
- bf1[31] = bf0[0] - bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
+ bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
+ bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
}
-void av1_iadst4_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 4;
- const int32_t *cospi;
+void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ int bit = cos_bit;
+ const int32_t *sinpi = sinpi_arr(bit);
+ int32_t s0, s1, s2, s3, s4, s5, s6, s7;
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[4];
+ int32_t x0 = input[0];
+ int32_t x1 = input[1];
+ int32_t x2 = input[2];
+ int32_t x3 = input[3];
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
- // stage 1;
- stage++;
- assert(output != input);
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[3];
- bf1[2] = -input[1];
- bf1[3] = input[2];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ assert(sinpi[1] + sinpi[2] == sinpi[4]);
+
+ // stage 1
+ s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit);
+ s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit);
+ s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit);
+ s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit);
+ s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit);
+ s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit);
+ s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit);
// stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ // NOTICE: (x0 - x2) here may use one extra bit compared to the
+ // opt_range_row/col specified in av1_gen_inv_stage_range()
+ s7 = range_check_value((x0 - x2) + x3, stage_range[2]);
// stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ s0 = range_check_value(s0 + s3, stage_range[3] + bit);
+ s1 = range_check_value(s1 - s4, stage_range[3] + bit);
+ s3 = range_check_value(s2, stage_range[3] + bit);
+ s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit);
// stage 4
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ s0 = range_check_value(s0 + s5, stage_range[4] + bit);
+ s1 = range_check_value(s1 - s6, stage_range[4] + bit);
// stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[2];
- bf1[2] = bf0[3];
- bf1[3] = bf0[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ x0 = range_check_value(s0 + s3, stage_range[5] + bit);
+ x1 = range_check_value(s1 + s3, stage_range[5] + bit);
+ x2 = range_check_value(s2, stage_range[5] + bit);
+ x3 = range_check_value(s0 + s1, stage_range[5] + bit);
+
+ // stage 6
+ x3 = range_check_value(x3 - s3, stage_range[6] + bit);
+
+ output[0] = round_shift(x0, bit);
+ output[1] = round_shift(x1, bit);
+ output[2] = round_shift(x2, bit);
+ output[3] = round_shift(x3, bit);
+ range_check_buf(6, input, output, 4, stage_range[6]);
}
-void av1_iadst8_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 8;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[8];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[7];
- bf1[2] = -input[3];
- bf1[3] = input[4];
- bf1[4] = -input[1];
- bf1[5] = input[6];
- bf1[6] = input[2];
- bf1[7] = -input[5];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = input[7];
+ bf1[1] = input[0];
+ bf1[2] = input[5];
+ bf1[3] = input[2];
+ bf1[4] = input[3];
+ bf1[5] = input[4];
+ bf1[6] = input[1];
+ bf1[7] = input[6];
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
+ bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = bf0[4] - bf0[6];
- bf1[7] = bf0[5] - bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 5
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = bf0[0] - bf0[4];
- bf1[5] = bf0[1] - bf0[5];
- bf1[6] = bf0[2] - bf0[6];
- bf1[7] = bf0[3] - bf0[7];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 6
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
- bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 7
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[6];
- bf1[2] = bf0[3];
- bf1[3] = bf0[4];
- bf1[4] = bf0[5];
- bf1[5] = bf0[2];
- bf1[6] = bf0[7];
- bf1[7] = bf0[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[4];
+ bf1[2] = bf0[6];
+ bf1[3] = -bf0[2];
+ bf1[4] = bf0[3];
+ bf1[5] = -bf0[7];
+ bf1[6] = bf0[5];
+ bf1[7] = -bf0[1];
}
-void av1_iadst16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 16;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[16];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- assert(output != input);
bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[15];
- bf1[2] = -input[7];
- bf1[3] = input[8];
- bf1[4] = -input[3];
- bf1[5] = input[12];
- bf1[6] = input[4];
- bf1[7] = -input[11];
- bf1[8] = -input[1];
- bf1[9] = input[14];
- bf1[10] = input[6];
- bf1[11] = -input[9];
- bf1[12] = input[2];
- bf1[13] = -input[13];
- bf1[14] = -input[5];
- bf1[15] = input[10];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = input[15];
+ bf1[1] = input[0];
+ bf1[2] = input[13];
+ bf1[3] = input[2];
+ bf1[4] = input[11];
+ bf1[5] = input[4];
+ bf1[6] = input[9];
+ bf1[7] = input[6];
+ bf1[8] = input[7];
+ bf1[9] = input[8];
+ bf1[10] = input[5];
+ bf1[11] = input[10];
+ bf1[12] = input[3];
+ bf1[13] = input[12];
+ bf1[14] = input[1];
+ bf1[15] = input[14];
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
+ bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
+ bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
+ bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
+ bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
+ bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = bf0[4] - bf0[6];
- bf1[7] = bf0[5] - bf0[7];
- bf1[8] = bf0[8] + bf0[10];
- bf1[9] = bf0[9] + bf0[11];
- bf1[10] = bf0[8] - bf0[10];
- bf1[11] = bf0[9] - bf0[11];
- bf1[12] = bf0[12] + bf0[14];
- bf1[13] = bf0[13] + bf0[15];
- bf1[14] = bf0[12] - bf0[14];
- bf1[15] = bf0[13] - bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = bf0[0] - bf0[4];
- bf1[5] = bf0[1] - bf0[5];
- bf1[6] = bf0[2] - bf0[6];
- bf1[7] = bf0[3] - bf0[7];
- bf1[8] = bf0[8] + bf0[12];
- bf1[9] = bf0[9] + bf0[13];
- bf1[10] = bf0[10] + bf0[14];
- bf1[11] = bf0[11] + bf0[15];
- bf1[12] = bf0[8] - bf0[12];
- bf1[13] = bf0[9] - bf0[13];
- bf1[14] = bf0[10] - bf0[14];
- bf1[15] = bf0[11] - bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
@@ -1049,579 +961,173 @@ void av1_iadst16_new(const int32_t *input, int32_t *output,
bf1[5] = bf0[5];
bf1[6] = bf0[6];
bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[8];
- bf1[1] = bf0[1] + bf0[9];
- bf1[2] = bf0[2] + bf0[10];
- bf1[3] = bf0[3] + bf0[11];
- bf1[4] = bf0[4] + bf0[12];
- bf1[5] = bf0[5] + bf0[13];
- bf1[6] = bf0[6] + bf0[14];
- bf1[7] = bf0[7] + bf0[15];
- bf1[8] = bf0[0] - bf0[8];
- bf1[9] = bf0[1] - bf0[9];
- bf1[10] = bf0[2] - bf0[10];
- bf1[11] = bf0[3] - bf0[11];
- bf1[12] = bf0[4] - bf0[12];
- bf1[13] = bf0[5] - bf0[13];
- bf1[14] = bf0[6] - bf0[14];
- bf1[15] = bf0[7] - bf0[15];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
- bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
- bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
- bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
- bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[14];
- bf1[2] = bf0[3];
- bf1[3] = bf0[12];
- bf1[4] = bf0[5];
- bf1[5] = bf0[10];
- bf1[6] = bf0[7];
- bf1[7] = bf0[8];
- bf1[8] = bf0[9];
- bf1[9] = bf0[6];
- bf1[10] = bf0[11];
- bf1[11] = bf0[4];
- bf1[12] = bf0[13];
- bf1[13] = bf0[2];
- bf1[14] = bf0[15];
- bf1[15] = bf0[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_iadst32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int32_t size = 32;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[32];
-
- // stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- assert(output != input);
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[31];
- bf1[2] = -input[15];
- bf1[3] = input[16];
- bf1[4] = -input[7];
- bf1[5] = input[24];
- bf1[6] = input[8];
- bf1[7] = -input[23];
- bf1[8] = -input[3];
- bf1[9] = input[28];
- bf1[10] = input[12];
- bf1[11] = -input[19];
- bf1[12] = input[4];
- bf1[13] = -input[27];
- bf1[14] = -input[11];
- bf1[15] = input[20];
- bf1[16] = -input[1];
- bf1[17] = input[30];
- bf1[18] = input[14];
- bf1[19] = -input[17];
- bf1[20] = input[6];
- bf1[21] = -input[25];
- bf1[22] = -input[9];
- bf1[23] = input[22];
- bf1[24] = input[2];
- bf1[25] = -input[29];
- bf1[26] = -input[13];
- bf1[27] = input[18];
- bf1[28] = -input[5];
- bf1[29] = input[26];
- bf1[30] = input[10];
- bf1[31] = -input[21];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
+ // stage 5
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = bf0[4] - bf0[6];
- bf1[7] = bf0[5] - bf0[7];
- bf1[8] = bf0[8] + bf0[10];
- bf1[9] = bf0[9] + bf0[11];
- bf1[10] = bf0[8] - bf0[10];
- bf1[11] = bf0[9] - bf0[11];
- bf1[12] = bf0[12] + bf0[14];
- bf1[13] = bf0[13] + bf0[15];
- bf1[14] = bf0[12] - bf0[14];
- bf1[15] = bf0[13] - bf0[15];
- bf1[16] = bf0[16] + bf0[18];
- bf1[17] = bf0[17] + bf0[19];
- bf1[18] = bf0[16] - bf0[18];
- bf1[19] = bf0[17] - bf0[19];
- bf1[20] = bf0[20] + bf0[22];
- bf1[21] = bf0[21] + bf0[23];
- bf1[22] = bf0[20] - bf0[22];
- bf1[23] = bf0[21] - bf0[23];
- bf1[24] = bf0[24] + bf0[26];
- bf1[25] = bf0[25] + bf0[27];
- bf1[26] = bf0[24] - bf0[26];
- bf1[27] = bf0[25] - bf0[27];
- bf1[28] = bf0[28] + bf0[30];
- bf1[29] = bf0[29] + bf0[31];
- bf1[30] = bf0[28] - bf0[30];
- bf1[31] = bf0[29] - bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
- // stage 4
+ // stage 6
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
bf1[10] = bf0[10];
bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = bf0[26];
- bf1[27] = bf0[27];
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = bf0[0] - bf0[4];
- bf1[5] = bf0[1] - bf0[5];
- bf1[6] = bf0[2] - bf0[6];
- bf1[7] = bf0[3] - bf0[7];
- bf1[8] = bf0[8] + bf0[12];
- bf1[9] = bf0[9] + bf0[13];
- bf1[10] = bf0[10] + bf0[14];
- bf1[11] = bf0[11] + bf0[15];
- bf1[12] = bf0[8] - bf0[12];
- bf1[13] = bf0[9] - bf0[13];
- bf1[14] = bf0[10] - bf0[14];
- bf1[15] = bf0[11] - bf0[15];
- bf1[16] = bf0[16] + bf0[20];
- bf1[17] = bf0[17] + bf0[21];
- bf1[18] = bf0[18] + bf0[22];
- bf1[19] = bf0[19] + bf0[23];
- bf1[20] = bf0[16] - bf0[20];
- bf1[21] = bf0[17] - bf0[21];
- bf1[22] = bf0[18] - bf0[22];
- bf1[23] = bf0[19] - bf0[23];
- bf1[24] = bf0[24] + bf0[28];
- bf1[25] = bf0[25] + bf0[29];
- bf1[26] = bf0[26] + bf0[30];
- bf1[27] = bf0[27] + bf0[31];
- bf1[28] = bf0[24] - bf0[28];
- bf1[29] = bf0[25] - bf0[29];
- bf1[30] = bf0[26] - bf0[30];
- bf1[31] = bf0[27] - bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 7
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[8];
- bf1[1] = bf0[1] + bf0[9];
- bf1[2] = bf0[2] + bf0[10];
- bf1[3] = bf0[3] + bf0[11];
- bf1[4] = bf0[4] + bf0[12];
- bf1[5] = bf0[5] + bf0[13];
- bf1[6] = bf0[6] + bf0[14];
- bf1[7] = bf0[7] + bf0[15];
- bf1[8] = bf0[0] - bf0[8];
- bf1[9] = bf0[1] - bf0[9];
- bf1[10] = bf0[2] - bf0[10];
- bf1[11] = bf0[3] - bf0[11];
- bf1[12] = bf0[4] - bf0[12];
- bf1[13] = bf0[5] - bf0[13];
- bf1[14] = bf0[6] - bf0[14];
- bf1[15] = bf0[7] - bf0[15];
- bf1[16] = bf0[16] + bf0[24];
- bf1[17] = bf0[17] + bf0[25];
- bf1[18] = bf0[18] + bf0[26];
- bf1[19] = bf0[19] + bf0[27];
- bf1[20] = bf0[20] + bf0[28];
- bf1[21] = bf0[21] + bf0[29];
- bf1[22] = bf0[22] + bf0[30];
- bf1[23] = bf0[23] + bf0[31];
- bf1[24] = bf0[16] - bf0[24];
- bf1[25] = bf0[17] - bf0[25];
- bf1[26] = bf0[18] - bf0[26];
- bf1[27] = bf0[19] - bf0[27];
- bf1[28] = bf0[20] - bf0[28];
- bf1[29] = bf0[21] - bf0[29];
- bf1[30] = bf0[22] - bf0[30];
- bf1[31] = bf0[23] - bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 8
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
bf1[4] = bf0[4];
bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
bf1[12] = bf0[12];
bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
- bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
- bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
- bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
- bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
- bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 9
stage++;
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[16];
- bf1[1] = bf0[1] + bf0[17];
- bf1[2] = bf0[2] + bf0[18];
- bf1[3] = bf0[3] + bf0[19];
- bf1[4] = bf0[4] + bf0[20];
- bf1[5] = bf0[5] + bf0[21];
- bf1[6] = bf0[6] + bf0[22];
- bf1[7] = bf0[7] + bf0[23];
- bf1[8] = bf0[8] + bf0[24];
- bf1[9] = bf0[9] + bf0[25];
- bf1[10] = bf0[10] + bf0[26];
- bf1[11] = bf0[11] + bf0[27];
- bf1[12] = bf0[12] + bf0[28];
- bf1[13] = bf0[13] + bf0[29];
- bf1[14] = bf0[14] + bf0[30];
- bf1[15] = bf0[15] + bf0[31];
- bf1[16] = bf0[0] - bf0[16];
- bf1[17] = bf0[1] - bf0[17];
- bf1[18] = bf0[2] - bf0[18];
- bf1[19] = bf0[3] - bf0[19];
- bf1[20] = bf0[4] - bf0[20];
- bf1[21] = bf0[5] - bf0[21];
- bf1[22] = bf0[6] - bf0[22];
- bf1[23] = bf0[7] - bf0[23];
- bf1[24] = bf0[8] - bf0[24];
- bf1[25] = bf0[9] - bf0[25];
- bf1[26] = bf0[10] - bf0[26];
- bf1[27] = bf0[11] - bf0[27];
- bf1[28] = bf0[12] - bf0[28];
- bf1[29] = bf0[13] - bf0[29];
- bf1[30] = bf0[14] - bf0[30];
- bf1[31] = bf0[15] - bf0[31];
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 10
- stage++;
- cospi = cospi_arr(cos_bit[stage]);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
- bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
- bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
- bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
- bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
- bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
- bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
- bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
- bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
- bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
- bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
- bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
- bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
- bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
- bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
- bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
- bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
- bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
- bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
- bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
- bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
- bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
- bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
- bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
- bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
-
- // stage 11
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[30];
- bf1[2] = bf0[3];
- bf1[3] = bf0[28];
- bf1[4] = bf0[5];
- bf1[5] = bf0[26];
- bf1[6] = bf0[7];
- bf1[7] = bf0[24];
- bf1[8] = bf0[9];
- bf1[9] = bf0[22];
- bf1[10] = bf0[11];
- bf1[11] = bf0[20];
- bf1[12] = bf0[13];
- bf1[13] = bf0[18];
- bf1[14] = bf0[15];
- bf1[15] = bf0[16];
- bf1[16] = bf0[17];
- bf1[17] = bf0[14];
- bf1[18] = bf0[19];
- bf1[19] = bf0[12];
- bf1[20] = bf0[21];
- bf1[21] = bf0[10];
- bf1[22] = bf0[23];
- bf1[23] = bf0[8];
- bf1[24] = bf0[25];
- bf1[25] = bf0[6];
- bf1[26] = bf0[27];
- bf1[27] = bf0[4];
- bf1[28] = bf0[29];
- bf1[29] = bf0[2];
- bf1[30] = bf0[31];
- bf1[31] = bf0[0];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[8];
+ bf1[2] = bf0[12];
+ bf1[3] = -bf0[4];
+ bf1[4] = bf0[6];
+ bf1[5] = -bf0[14];
+ bf1[6] = bf0[10];
+ bf1[7] = -bf0[2];
+ bf1[8] = bf0[3];
+ bf1[9] = -bf0[11];
+ bf1[10] = bf0[15];
+ bf1[11] = -bf0[7];
+ bf1[12] = bf0[5];
+ bf1[13] = -bf0[13];
+ bf1[14] = bf0[9];
+ bf1[15] = -bf0[1];
}
-#if CONFIG_EXT_TX
-void av1_iidentity4_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
(void)cos_bit;
- for (int i = 0; i < 4; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * Sqrt2);
- range_check(0, input, output, 4, stage_range[0]);
+ (void)stage_range;
+ for (int i = 0; i < 4; ++i) {
+ output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits);
+ }
+ assert(stage_range[0] + NewSqrt2Bits <= 32);
}
-void av1_iidentity8_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
(void)cos_bit;
- for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
- range_check(0, input, output, 8, stage_range[0]);
+ (void)stage_range;
+ for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2);
}
-void av1_iidentity16_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
(void)cos_bit;
+ (void)stage_range;
for (int i = 0; i < 16; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
- range_check(0, input, output, 16, stage_range[0]);
-}
-
-void av1_iidentity32_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
- range_check(0, input, output, 32, stage_range[0]);
+ output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits);
+ assert(stage_range[0] + NewSqrt2Bits <= 32);
}
-#if CONFIG_TX64X64
-void av1_iidentity64_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
(void)cos_bit;
- for (int i = 0; i < 64; ++i)
- output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
- range_check(0, input, output, 64, stage_range[0]);
+ (void)stage_range;
+ for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
-#if CONFIG_TX64X64
-void av1_idct64_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
+void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
const int32_t size = 64;
- const int32_t *cospi;
+ const int32_t *cospi = cospi_arr(cos_bit);
int32_t stage = 0;
int32_t *bf0, *bf1;
int32_t step[64];
// stage 0;
- range_check(stage, input, input, size, stage_range[stage]);
// stage 1;
stage++;
- cospi = cospi_arr(cos_bit[stage]);
- assert(output != input);
bf1 = output;
bf1[0] = input[0];
bf1[1] = input[32];
@@ -1687,11 +1193,10 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[61] = input[47];
bf1[62] = input[31];
bf1[63] = input[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 2
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
@@ -1726,43 +1231,42 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[29] = bf0[29];
bf1[30] = bf0[30];
bf1[31] = bf0[31];
- bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
- bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
- bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
- bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
- bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
- bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
- bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
- bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
- bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
- bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
- bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
- bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
- bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
- bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
- bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
- bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
- bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
- bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
- bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
- bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
- bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
- bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
- bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
- bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit);
+ bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit);
+ bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit);
+ bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit);
+ bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit);
+ bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit);
+ bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit);
+ bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit);
+ bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit);
+ bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit);
+ bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit);
+ bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit);
+ bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit);
+ bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit);
+ bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit);
+ bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit);
+ bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit);
+ bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit);
+ bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit);
+ bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit);
+ bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit);
+ bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit);
+ bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit);
+ bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit);
+ bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit);
+ bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit);
+ bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit);
+ bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit);
+ bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit);
+ bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit);
+ bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit);
+ bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 3
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
bf1[0] = bf0[0];
@@ -1781,59 +1285,58 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[13] = bf0[13];
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
- bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
- bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
- bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
- bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
- bf1[32] = bf0[32] + bf0[33];
- bf1[33] = bf0[32] - bf0[33];
- bf1[34] = -bf0[34] + bf0[35];
- bf1[35] = bf0[34] + bf0[35];
- bf1[36] = bf0[36] + bf0[37];
- bf1[37] = bf0[36] - bf0[37];
- bf1[38] = -bf0[38] + bf0[39];
- bf1[39] = bf0[38] + bf0[39];
- bf1[40] = bf0[40] + bf0[41];
- bf1[41] = bf0[40] - bf0[41];
- bf1[42] = -bf0[42] + bf0[43];
- bf1[43] = bf0[42] + bf0[43];
- bf1[44] = bf0[44] + bf0[45];
- bf1[45] = bf0[44] - bf0[45];
- bf1[46] = -bf0[46] + bf0[47];
- bf1[47] = bf0[46] + bf0[47];
- bf1[48] = bf0[48] + bf0[49];
- bf1[49] = bf0[48] - bf0[49];
- bf1[50] = -bf0[50] + bf0[51];
- bf1[51] = bf0[50] + bf0[51];
- bf1[52] = bf0[52] + bf0[53];
- bf1[53] = bf0[52] - bf0[53];
- bf1[54] = -bf0[54] + bf0[55];
- bf1[55] = bf0[54] + bf0[55];
- bf1[56] = bf0[56] + bf0[57];
- bf1[57] = bf0[56] - bf0[57];
- bf1[58] = -bf0[58] + bf0[59];
- bf1[59] = bf0[58] + bf0[59];
- bf1[60] = bf0[60] + bf0[61];
- bf1[61] = bf0[60] - bf0[61];
- bf1[62] = -bf0[62] + bf0[63];
- bf1[63] = bf0[62] + bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
+ bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]);
+ bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]);
+ bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]);
+ bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]);
+ bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]);
+ bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]);
+ bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]);
+ bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]);
+ bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]);
+ bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]);
+ bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]);
+ bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]);
+ bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]);
+ bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]);
+ bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]);
+ bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]);
+ bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]);
+ bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]);
+ bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]);
+ bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]);
+ bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]);
+ bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]);
+ bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]);
+ bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]);
+ bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]);
+ bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]);
+ bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]);
+ bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]);
+ bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]);
+ bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]);
+ bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]);
+ bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 4
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
bf1[0] = bf0[0];
@@ -1844,326 +1347,322 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[5] = bf0[5];
bf1[6] = bf0[6];
bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = bf0[16] - bf0[17];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[18] + bf0[19];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = bf0[20] - bf0[21];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[22] + bf0[23];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = bf0[24] - bf0[25];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[26] + bf0[27];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = bf0[28] - bf0[29];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[30] + bf0[31];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
+ bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
+ bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
+ bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
bf1[32] = bf0[32];
- bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
- bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+ bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
+ bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
bf1[35] = bf0[35];
bf1[36] = bf0[36];
- bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+ bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
+ bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
bf1[39] = bf0[39];
bf1[40] = bf0[40];
- bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+ bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
+ bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
bf1[43] = bf0[43];
bf1[44] = bf0[44];
- bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+ bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
+ bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
bf1[47] = bf0[47];
bf1[48] = bf0[48];
- bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
- bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
+ bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit);
+ bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit);
bf1[51] = bf0[51];
bf1[52] = bf0[52];
- bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
- bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
+ bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit);
+ bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit);
bf1[55] = bf0[55];
bf1[56] = bf0[56];
- bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
- bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
+ bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit);
+ bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit);
bf1[59] = bf0[59];
bf1[60] = bf0[60];
- bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
- bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
+ bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit);
+ bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit);
bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 5
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
bf1[0] = bf0[0];
bf1[1] = bf0[1];
bf1[2] = bf0[2];
bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = bf0[8] - bf0[9];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[10] + bf0[11];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = bf0[12] - bf0[13];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[14] + bf0[15];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
+ bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
bf1[19] = bf0[19];
bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
bf1[23] = bf0[23];
bf1[24] = bf0[24];
- bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
bf1[27] = bf0[27];
bf1[28] = bf0[28];
- bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
- bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[35];
- bf1[33] = bf0[33] + bf0[34];
- bf1[34] = bf0[33] - bf0[34];
- bf1[35] = bf0[32] - bf0[35];
- bf1[36] = -bf0[36] + bf0[39];
- bf1[37] = -bf0[37] + bf0[38];
- bf1[38] = bf0[37] + bf0[38];
- bf1[39] = bf0[36] + bf0[39];
- bf1[40] = bf0[40] + bf0[43];
- bf1[41] = bf0[41] + bf0[42];
- bf1[42] = bf0[41] - bf0[42];
- bf1[43] = bf0[40] - bf0[43];
- bf1[44] = -bf0[44] + bf0[47];
- bf1[45] = -bf0[45] + bf0[46];
- bf1[46] = bf0[45] + bf0[46];
- bf1[47] = bf0[44] + bf0[47];
- bf1[48] = bf0[48] + bf0[51];
- bf1[49] = bf0[49] + bf0[50];
- bf1[50] = bf0[49] - bf0[50];
- bf1[51] = bf0[48] - bf0[51];
- bf1[52] = -bf0[52] + bf0[55];
- bf1[53] = -bf0[53] + bf0[54];
- bf1[54] = bf0[53] + bf0[54];
- bf1[55] = bf0[52] + bf0[55];
- bf1[56] = bf0[56] + bf0[59];
- bf1[57] = bf0[57] + bf0[58];
- bf1[58] = bf0[57] - bf0[58];
- bf1[59] = bf0[56] - bf0[59];
- bf1[60] = -bf0[60] + bf0[63];
- bf1[61] = -bf0[61] + bf0[62];
- bf1[62] = bf0[61] + bf0[62];
- bf1[63] = bf0[60] + bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]);
+ bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]);
+ bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]);
+ bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]);
+ bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]);
+ bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]);
+ bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]);
+ bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]);
+ bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]);
+ bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]);
+ bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]);
+ bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]);
+ bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]);
+ bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]);
+ bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]);
+ bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]);
+ bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]);
+ bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]);
+ bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]);
+ bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]);
+ bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]);
+ bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]);
+ bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]);
+ bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]);
+ bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]);
+ bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]);
+ bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]);
+ bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]);
+ bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]);
+ bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]);
+ bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]);
+ bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 6
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = bf0[4] - bf0[5];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[6] + bf0[7];
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
+ bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
bf1[11] = bf0[11];
bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = bf0[17] - bf0[18];
- bf1[19] = bf0[16] - bf0[19];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[21] + bf0[22];
- bf1[23] = bf0[20] + bf0[23];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = bf0[25] - bf0[26];
- bf1[27] = bf0[24] - bf0[27];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[29] + bf0[30];
- bf1[31] = bf0[28] + bf0[31];
+ bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
+ bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
+ bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
+ bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
bf1[32] = bf0[32];
bf1[33] = bf0[33];
- bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
- bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
- bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+ bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
+ bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
+ bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
+ bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
bf1[38] = bf0[38];
bf1[39] = bf0[39];
bf1[40] = bf0[40];
bf1[41] = bf0[41];
- bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+ bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
+ bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
+ bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
+ bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
bf1[46] = bf0[46];
bf1[47] = bf0[47];
bf1[48] = bf0[48];
bf1[49] = bf0[49];
- bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
- bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
- bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
- bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
+ bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit);
+ bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit);
+ bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit);
+ bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit);
bf1[54] = bf0[54];
bf1[55] = bf0[55];
bf1[56] = bf0[56];
bf1[57] = bf0[57];
- bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
- bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
- bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
- bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
+ bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit);
+ bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit);
+ bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit);
+ bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit);
bf1[62] = bf0[62];
bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 7
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = bf0[1] - bf0[2];
- bf1[3] = bf0[0] - bf0[3];
+ bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = bf0[9] - bf0[10];
- bf1[11] = bf0[8] - bf0[11];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[13] + bf0[14];
- bf1[15] = bf0[12] + bf0[15];
+ bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
+ bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
bf1[22] = bf0[22];
bf1[23] = bf0[23];
bf1[24] = bf0[24];
bf1[25] = bf0[25];
- bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
- bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
- bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
bf1[30] = bf0[30];
bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[39];
- bf1[33] = bf0[33] + bf0[38];
- bf1[34] = bf0[34] + bf0[37];
- bf1[35] = bf0[35] + bf0[36];
- bf1[36] = bf0[35] - bf0[36];
- bf1[37] = bf0[34] - bf0[37];
- bf1[38] = bf0[33] - bf0[38];
- bf1[39] = bf0[32] - bf0[39];
- bf1[40] = -bf0[40] + bf0[47];
- bf1[41] = -bf0[41] + bf0[46];
- bf1[42] = -bf0[42] + bf0[45];
- bf1[43] = -bf0[43] + bf0[44];
- bf1[44] = bf0[43] + bf0[44];
- bf1[45] = bf0[42] + bf0[45];
- bf1[46] = bf0[41] + bf0[46];
- bf1[47] = bf0[40] + bf0[47];
- bf1[48] = bf0[48] + bf0[55];
- bf1[49] = bf0[49] + bf0[54];
- bf1[50] = bf0[50] + bf0[53];
- bf1[51] = bf0[51] + bf0[52];
- bf1[52] = bf0[51] - bf0[52];
- bf1[53] = bf0[50] - bf0[53];
- bf1[54] = bf0[49] - bf0[54];
- bf1[55] = bf0[48] - bf0[55];
- bf1[56] = -bf0[56] + bf0[63];
- bf1[57] = -bf0[57] + bf0[62];
- bf1[58] = -bf0[58] + bf0[61];
- bf1[59] = -bf0[59] + bf0[60];
- bf1[60] = bf0[59] + bf0[60];
- bf1[61] = bf0[58] + bf0[61];
- bf1[62] = bf0[57] + bf0[62];
- bf1[63] = bf0[56] + bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]);
+ bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]);
+ bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]);
+ bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]);
+ bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]);
+ bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]);
+ bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]);
+ bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]);
+ bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]);
+ bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]);
+ bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]);
+ bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]);
+ bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]);
+ bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]);
+ bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]);
+ bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]);
+ bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]);
+ bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]);
+ bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]);
+ bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]);
+ bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]);
+ bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]);
+ bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]);
+ bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]);
+ bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]);
+ bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]);
+ bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]);
+ bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]);
+ bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]);
+ bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]);
+ bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]);
+ bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 8
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = bf0[3] - bf0[4];
- bf1[5] = bf0[2] - bf0[5];
- bf1[6] = bf0[1] - bf0[6];
- bf1[7] = bf0[0] - bf0[7];
+ bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = bf0[19] - bf0[20];
- bf1[21] = bf0[18] - bf0[21];
- bf1[22] = bf0[17] - bf0[22];
- bf1[23] = bf0[16] - bf0[23];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[27] + bf0[28];
- bf1[29] = bf0[26] + bf0[29];
- bf1[30] = bf0[25] + bf0[30];
- bf1[31] = bf0[24] + bf0[31];
+ bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
+ bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
+ bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
+ bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
bf1[32] = bf0[32];
bf1[33] = bf0[33];
bf1[34] = bf0[34];
bf1[35] = bf0[35];
- bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
- bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
- bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
- bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
- bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+ bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
+ bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
+ bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
+ bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
+ bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
+ bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
+ bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
+ bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
bf1[44] = bf0[44];
bf1[45] = bf0[45];
bf1[46] = bf0[46];
@@ -2172,128 +1671,126 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[49] = bf0[49];
bf1[50] = bf0[50];
bf1[51] = bf0[51];
- bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
- bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
- bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
- bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
- bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
- bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
- bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
- bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
+ bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit);
+ bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit);
+ bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit);
+ bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit);
+ bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit);
+ bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit);
+ bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit);
+ bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit);
bf1[60] = bf0[60];
bf1[61] = bf0[61];
bf1[62] = bf0[62];
bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 9
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = bf0[7] - bf0[8];
- bf1[9] = bf0[6] - bf0[9];
- bf1[10] = bf0[5] - bf0[10];
- bf1[11] = bf0[4] - bf0[11];
- bf1[12] = bf0[3] - bf0[12];
- bf1[13] = bf0[2] - bf0[13];
- bf1[14] = bf0[1] - bf0[14];
- bf1[15] = bf0[0] - bf0[15];
+ bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
bf1[16] = bf0[16];
bf1[17] = bf0[17];
bf1[18] = bf0[18];
bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
- bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
- bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
- bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
bf1[28] = bf0[28];
bf1[29] = bf0[29];
bf1[30] = bf0[30];
bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[47];
- bf1[33] = bf0[33] + bf0[46];
- bf1[34] = bf0[34] + bf0[45];
- bf1[35] = bf0[35] + bf0[44];
- bf1[36] = bf0[36] + bf0[43];
- bf1[37] = bf0[37] + bf0[42];
- bf1[38] = bf0[38] + bf0[41];
- bf1[39] = bf0[39] + bf0[40];
- bf1[40] = bf0[39] - bf0[40];
- bf1[41] = bf0[38] - bf0[41];
- bf1[42] = bf0[37] - bf0[42];
- bf1[43] = bf0[36] - bf0[43];
- bf1[44] = bf0[35] - bf0[44];
- bf1[45] = bf0[34] - bf0[45];
- bf1[46] = bf0[33] - bf0[46];
- bf1[47] = bf0[32] - bf0[47];
- bf1[48] = -bf0[48] + bf0[63];
- bf1[49] = -bf0[49] + bf0[62];
- bf1[50] = -bf0[50] + bf0[61];
- bf1[51] = -bf0[51] + bf0[60];
- bf1[52] = -bf0[52] + bf0[59];
- bf1[53] = -bf0[53] + bf0[58];
- bf1[54] = -bf0[54] + bf0[57];
- bf1[55] = -bf0[55] + bf0[56];
- bf1[56] = bf0[55] + bf0[56];
- bf1[57] = bf0[54] + bf0[57];
- bf1[58] = bf0[53] + bf0[58];
- bf1[59] = bf0[52] + bf0[59];
- bf1[60] = bf0[51] + bf0[60];
- bf1[61] = bf0[50] + bf0[61];
- bf1[62] = bf0[49] + bf0[62];
- bf1[63] = bf0[48] + bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]);
+ bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]);
+ bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]);
+ bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]);
+ bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]);
+ bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]);
+ bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]);
+ bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]);
+ bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]);
+ bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]);
+ bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]);
+ bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]);
+ bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]);
+ bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]);
+ bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]);
+ bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]);
+ bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]);
+ bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]);
+ bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]);
+ bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]);
+ bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]);
+ bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]);
+ bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]);
+ bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]);
+ bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]);
+ bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]);
+ bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]);
+ bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]);
+ bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]);
+ bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]);
+ bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]);
+ bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 10
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = output;
bf1 = step;
- bf1[0] = bf0[0] + bf0[31];
- bf1[1] = bf0[1] + bf0[30];
- bf1[2] = bf0[2] + bf0[29];
- bf1[3] = bf0[3] + bf0[28];
- bf1[4] = bf0[4] + bf0[27];
- bf1[5] = bf0[5] + bf0[26];
- bf1[6] = bf0[6] + bf0[25];
- bf1[7] = bf0[7] + bf0[24];
- bf1[8] = bf0[8] + bf0[23];
- bf1[9] = bf0[9] + bf0[22];
- bf1[10] = bf0[10] + bf0[21];
- bf1[11] = bf0[11] + bf0[20];
- bf1[12] = bf0[12] + bf0[19];
- bf1[13] = bf0[13] + bf0[18];
- bf1[14] = bf0[14] + bf0[17];
- bf1[15] = bf0[15] + bf0[16];
- bf1[16] = bf0[15] - bf0[16];
- bf1[17] = bf0[14] - bf0[17];
- bf1[18] = bf0[13] - bf0[18];
- bf1[19] = bf0[12] - bf0[19];
- bf1[20] = bf0[11] - bf0[20];
- bf1[21] = bf0[10] - bf0[21];
- bf1[22] = bf0[9] - bf0[22];
- bf1[23] = bf0[8] - bf0[23];
- bf1[24] = bf0[7] - bf0[24];
- bf1[25] = bf0[6] - bf0[25];
- bf1[26] = bf0[5] - bf0[26];
- bf1[27] = bf0[4] - bf0[27];
- bf1[28] = bf0[3] - bf0[28];
- bf1[29] = bf0[2] - bf0[29];
- bf1[30] = bf0[1] - bf0[30];
- bf1[31] = bf0[0] - bf0[31];
+ bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
+ bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
+ bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
bf1[32] = bf0[32];
bf1[33] = bf0[33];
bf1[34] = bf0[34];
@@ -2302,22 +1799,22 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[37] = bf0[37];
bf1[38] = bf0[38];
bf1[39] = bf0[39];
- bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
- bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
- bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
- bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
- bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
- bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
- bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
- bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
- bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
- bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
- bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
- bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
- bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
- bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
- bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
- bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+ bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
+ bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
+ bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
+ bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
+ bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
+ bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
+ bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
+ bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
+ bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
+ bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
+ bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
+ bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
+ bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
+ bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
+ bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
+ bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
bf1[56] = bf0[56];
bf1[57] = bf0[57];
bf1[58] = bf0[58];
@@ -2326,77 +1823,74 @@ void av1_idct64_new(const int32_t *input, int32_t *output,
bf1[61] = bf0[61];
bf1[62] = bf0[62];
bf1[63] = bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ range_check_buf(stage, input, bf1, size, stage_range[stage]);
// stage 11
stage++;
- cospi = cospi_arr(cos_bit[stage]);
bf0 = step;
bf1 = output;
- bf1[0] = bf0[0] + bf0[63];
- bf1[1] = bf0[1] + bf0[62];
- bf1[2] = bf0[2] + bf0[61];
- bf1[3] = bf0[3] + bf0[60];
- bf1[4] = bf0[4] + bf0[59];
- bf1[5] = bf0[5] + bf0[58];
- bf1[6] = bf0[6] + bf0[57];
- bf1[7] = bf0[7] + bf0[56];
- bf1[8] = bf0[8] + bf0[55];
- bf1[9] = bf0[9] + bf0[54];
- bf1[10] = bf0[10] + bf0[53];
- bf1[11] = bf0[11] + bf0[52];
- bf1[12] = bf0[12] + bf0[51];
- bf1[13] = bf0[13] + bf0[50];
- bf1[14] = bf0[14] + bf0[49];
- bf1[15] = bf0[15] + bf0[48];
- bf1[16] = bf0[16] + bf0[47];
- bf1[17] = bf0[17] + bf0[46];
- bf1[18] = bf0[18] + bf0[45];
- bf1[19] = bf0[19] + bf0[44];
- bf1[20] = bf0[20] + bf0[43];
- bf1[21] = bf0[21] + bf0[42];
- bf1[22] = bf0[22] + bf0[41];
- bf1[23] = bf0[23] + bf0[40];
- bf1[24] = bf0[24] + bf0[39];
- bf1[25] = bf0[25] + bf0[38];
- bf1[26] = bf0[26] + bf0[37];
- bf1[27] = bf0[27] + bf0[36];
- bf1[28] = bf0[28] + bf0[35];
- bf1[29] = bf0[29] + bf0[34];
- bf1[30] = bf0[30] + bf0[33];
- bf1[31] = bf0[31] + bf0[32];
- bf1[32] = bf0[31] - bf0[32];
- bf1[33] = bf0[30] - bf0[33];
- bf1[34] = bf0[29] - bf0[34];
- bf1[35] = bf0[28] - bf0[35];
- bf1[36] = bf0[27] - bf0[36];
- bf1[37] = bf0[26] - bf0[37];
- bf1[38] = bf0[25] - bf0[38];
- bf1[39] = bf0[24] - bf0[39];
- bf1[40] = bf0[23] - bf0[40];
- bf1[41] = bf0[22] - bf0[41];
- bf1[42] = bf0[21] - bf0[42];
- bf1[43] = bf0[20] - bf0[43];
- bf1[44] = bf0[19] - bf0[44];
- bf1[45] = bf0[18] - bf0[45];
- bf1[46] = bf0[17] - bf0[46];
- bf1[47] = bf0[16] - bf0[47];
- bf1[48] = bf0[15] - bf0[48];
- bf1[49] = bf0[14] - bf0[49];
- bf1[50] = bf0[13] - bf0[50];
- bf1[51] = bf0[12] - bf0[51];
- bf1[52] = bf0[11] - bf0[52];
- bf1[53] = bf0[10] - bf0[53];
- bf1[54] = bf0[9] - bf0[54];
- bf1[55] = bf0[8] - bf0[55];
- bf1[56] = bf0[7] - bf0[56];
- bf1[57] = bf0[6] - bf0[57];
- bf1[58] = bf0[5] - bf0[58];
- bf1[59] = bf0[4] - bf0[59];
- bf1[60] = bf0[3] - bf0[60];
- bf1[61] = bf0[2] - bf0[61];
- bf1[62] = bf0[1] - bf0[62];
- bf1[63] = bf0[0] - bf0[63];
- range_check(stage, input, bf1, size, stage_range[stage]);
+ bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]);
+ bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]);
+ bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]);
+ bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]);
+ bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]);
+ bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]);
+ bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]);
+ bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]);
+ bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]);
+ bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]);
+ bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]);
+ bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]);
+ bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]);
+ bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]);
+ bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]);
+ bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]);
+ bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]);
+ bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]);
+ bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]);
+ bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]);
+ bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]);
+ bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]);
+ bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]);
+ bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]);
+ bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]);
+ bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]);
+ bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]);
+ bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]);
+ bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]);
+ bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]);
+ bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]);
+ bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]);
+ bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]);
+ bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]);
+ bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]);
+ bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]);
+ bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]);
+ bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]);
+ bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]);
+ bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]);
+ bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]);
+ bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]);
+ bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]);
+ bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]);
+ bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]);
+ bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]);
+ bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]);
+ bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]);
+ bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]);
+ bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]);
+ bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]);
+ bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]);
+ bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]);
+ bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]);
+ bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]);
+ bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]);
+ bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]);
+ bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]);
+ bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]);
+ bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]);
+ bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]);
+ bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]);
+ bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]);
+ bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]);
}
-#endif // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.h b/third_party/aom/av1/common/av1_inv_txfm1d.h
index 8996f7c9d..64a1a921c 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d.h
+++ b/third_party/aom/av1/common/av1_inv_txfm1d.h
@@ -18,41 +18,41 @@
extern "C" {
#endif
-void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
+static INLINE int32_t clamp_value(int32_t value, int8_t bit) {
+ if (bit <= 0) return value; // Do nothing for invalid clamp bit.
+ const int64_t max_value = (1LL << (bit - 1)) - 1;
+ const int64_t min_value = -(1LL << (bit - 1));
+ return (int32_t)clamp64(value, min_value, max_value);
+}
+
+static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) {
+ for (int i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit);
+}
+
+void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
-void av1_idct8_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
+void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
-void av1_idct16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_idct32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_TX64X64
-void av1_idct64_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#endif // CONFIG_TX64X64
-
-void av1_iadst4_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst8_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst16_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst32_new(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_EXT_TX
-void av1_iidentity4_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iidentity8_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iidentity16_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iidentity32_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#if CONFIG_TX64X64
-void av1_iidentity64_c(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
+void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
#ifdef __cplusplus
}
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
index 8bcf84e05..4c600f756 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
+++ b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
@@ -14,358 +14,34 @@
#include "av1/common/av1_inv_txfm1d.h"
// sum of fwd_shift_##
-#if CONFIG_CHROMA_2X2
-#if CONFIG_TX64X64
-static const int8_t fwd_shift_sum[TX_SIZES] = { 3, 2, 1, 0, -2, -4 };
-#else // CONFIG_TX64X64
-static const int8_t fwd_shift_sum[TX_SIZES] = { 3, 2, 1, 0, -2 };
-#endif // CONFIG_TX64X64
-#else // CONFIG_CHROMA_2X2
-#if CONFIG_TX64X64
-static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2, -4 };
-#else // CONFIG_TX64X64
-static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2 };
-#endif // CONFIG_TX64X64
-#endif // CONFIG_CHROMA_2X2
+static const int8_t inv_start_range[TX_SIZES_ALL] = {
+ 5, // 4x4 transform
+ 6, // 8x8 transform
+ 7, // 16x16 transform
+ 7, // 32x32 transform
+ 7, // 64x64 transform
+ 5, // 4x8 transform
+ 5, // 8x4 transform
+ 6, // 8x16 transform
+ 6, // 16x8 transform
+ 6, // 16x32 transform
+ 6, // 32x16 transform
+ 6, // 32x64 transform
+ 6, // 64x32 transform
+ 6, // 4x16 transform
+ 6, // 16x4 transform
+ 7, // 8x32 transform
+ 7, // 32x8 transform
+ 7, // 16x64 transform
+ 7, // 64x16 transform
+};
+
+extern const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL];
+
+// Values in both inv_cos_bit_col and inv_cos_bit_row are always 12
+// for each valid row and col combination
+#define INV_COS_BIT 12
+extern const int8_t inv_cos_bit_col[5 /*row*/][5 /*col*/];
+extern const int8_t inv_cos_bit_row[5 /*row*/][5 /*col*/];
-// ---------------- 4x4 1D config -----------------------
-// shift
-static const int8_t inv_shift_4[2] = { 0, -4 };
-
-// stage range
-static const int8_t inv_stage_range_col_dct_4[4] = { 3, 3, 2, 2 };
-static const int8_t inv_stage_range_row_dct_4[4] = { 3, 3, 3, 3 };
-static const int8_t inv_stage_range_col_adst_4[6] = { 3, 3, 3, 3, 2, 2 };
-static const int8_t inv_stage_range_row_adst_4[6] = { 3, 3, 3, 3, 3, 3 };
-static const int8_t inv_stage_range_idx_4[1] = { 0 };
-
-// cos bit
-static const int8_t inv_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_4[4] = { 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_col_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
-
-// ---------------- 8x8 1D constants -----------------------
-// shift
-static const int8_t inv_shift_8[2] = { 0, -5 };
-
-// stage range
-static const int8_t inv_stage_range_col_dct_8[6] = { 5, 5, 5, 5, 4, 4 };
-static const int8_t inv_stage_range_row_dct_8[6] = { 5, 5, 5, 5, 5, 5 };
-static const int8_t inv_stage_range_col_adst_8[8] = { 5, 5, 5, 5, 5, 5, 4, 4 };
-static const int8_t inv_stage_range_row_adst_8[8] = { 5, 5, 5, 5, 5, 5, 5, 5 };
-static const int8_t inv_stage_range_idx_8[1] = { 0 };
-
-// cos bit
-static const int8_t inv_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_col_adst_8[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t inv_cos_bit_row_adst_8[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-
-// ---------------- 16x16 1D constants -----------------------
-// shift
-static const int8_t inv_shift_16[2] = { -1, -5 };
-
-// stage range
-static const int8_t inv_stage_range_col_dct_16[8] = { 7, 7, 7, 7, 7, 7, 6, 6 };
-static const int8_t inv_stage_range_row_dct_16[8] = { 7, 7, 7, 7, 7, 7, 7, 7 };
-static const int8_t inv_stage_range_col_adst_16[10] = { 7, 7, 7, 7, 7,
- 7, 7, 7, 6, 6 };
-static const int8_t inv_stage_range_row_adst_16[10] = { 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7 };
-static const int8_t inv_stage_range_idx_16[1] = { 0 };
-
-// cos bit
-static const int8_t inv_cos_bit_col_dct_16[8] = {
- 13, 13, 13, 13, 13, 13, 13, 13
-};
-static const int8_t inv_cos_bit_row_dct_16[8] = {
- 12, 12, 12, 12, 12, 12, 12, 12
-};
-static const int8_t inv_cos_bit_col_adst_16[10] = { 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12 };
-
-// ---------------- 32x32 1D constants -----------------------
-// shift
-static const int8_t inv_shift_32[2] = { -1, -5 };
-
-// stage range
-static const int8_t inv_stage_range_col_dct_32[10] = { 9, 9, 9, 9, 9,
- 9, 9, 9, 8, 8 };
-static const int8_t inv_stage_range_row_dct_32[10] = { 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9 };
-static const int8_t inv_stage_range_col_adst_32[12] = { 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 8, 8 };
-static const int8_t inv_stage_range_row_adst_32[12] = { 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9 };
-static const int8_t inv_stage_range_idx_32[1] = { 0 };
-
-// cos bit
-static const int8_t inv_cos_bit_col_dct_32[10] = { 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_32[10] = { 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12 };
-static const int8_t inv_cos_bit_col_adst_32[12] = { 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12 };
-
-// ---------------- 64x64 1D constants -----------------------
-// shift
-static const int8_t inv_shift_64[2] = { -1, -5 };
-
-// stage range
-static const int8_t inv_stage_range_col_dct_64[12] = { 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 10, 10 };
-static const int8_t inv_stage_range_row_dct_64[12] = { 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11 };
-
-static const int8_t inv_stage_range_idx_64[1] = { 0 };
-
-// cos bit
-static const int8_t inv_cos_bit_col_dct_64[12] = { 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13 };
-static const int8_t inv_cos_bit_row_dct_64[12] = { 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12 };
-
-// ---------------- row config inv_dct_4 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_4 = {
- 4, // .txfm_size
- 4, // .stage_num
- inv_shift_4, // .shift
- inv_stage_range_row_dct_4, // .stage_range
- inv_cos_bit_row_dct_4, // .cos_bit
- TXFM_TYPE_DCT4 // .txfm_type
-};
-
-// ---------------- row config inv_dct_8 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8 = {
- 8, // .txfm_size
- 6, // .stage_num
- inv_shift_8, // .shift
- inv_stage_range_row_dct_8, // .stage_range
- inv_cos_bit_row_dct_8, // .cos_bit_
- TXFM_TYPE_DCT8 // .txfm_type
-};
-// ---------------- row config inv_dct_16 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_16 = {
- 16, // .txfm_size
- 8, // .stage_num
- inv_shift_16, // .shift
- inv_stage_range_row_dct_16, // .stage_range
- inv_cos_bit_row_dct_16, // .cos_bit
- TXFM_TYPE_DCT16 // .txfm_type
-};
-
-// ---------------- row config inv_dct_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_32 = {
- 32, // .txfm_size
- 10, // .stage_num
- inv_shift_32, // .shift
- inv_stage_range_row_dct_32, // .stage_range
- inv_cos_bit_row_dct_32, // .cos_bit_row
- TXFM_TYPE_DCT32 // .txfm_type
-};
-
-#if CONFIG_TX64X64
-// ---------------- row config inv_dct_64 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
- 64, // .txfm_size
- 12, // .stage_num
- inv_shift_64, // .shift
- inv_stage_range_row_dct_64, // .stage_range
- inv_cos_bit_row_dct_64, // .cos_bit
- TXFM_TYPE_DCT64, // .txfm_type_col
-};
-#endif // CONFIG_TX64X64
-
-// ---------------- row config inv_adst_4 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
- 4, // .txfm_size
- 6, // .stage_num
- inv_shift_4, // .shift
- inv_stage_range_row_adst_4, // .stage_range
- inv_cos_bit_row_adst_4, // .cos_bit
- TXFM_TYPE_ADST4, // .txfm_type
-};
-
-// ---------------- row config inv_adst_8 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8 = {
- 8, // .txfm_size
- 8, // .stage_num
- inv_shift_8, // .shift
- inv_stage_range_row_adst_8, // .stage_range
- inv_cos_bit_row_adst_8, // .cos_bit
- TXFM_TYPE_ADST8, // .txfm_type_col
-};
-
-// ---------------- row config inv_adst_16 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_16 = {
- 16, // .txfm_size
- 10, // .stage_num
- inv_shift_16, // .shift
- inv_stage_range_row_adst_16, // .stage_range
- inv_cos_bit_row_adst_16, // .cos_bit
- TXFM_TYPE_ADST16, // .txfm_type
-};
-
-// ---------------- row config inv_adst_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_32 = {
- 32, // .txfm_size
- 12, // .stage_num
- inv_shift_32, // .shift
- inv_stage_range_row_adst_32, // .stage_range
- inv_cos_bit_row_adst_32, // .cos_bit
- TXFM_TYPE_ADST32, // .txfm_type
-};
-
-// ---------------- col config inv_dct_4 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_4 = {
- 4, // .txfm_size
- 4, // .stage_num
- inv_shift_4, // .shift
- inv_stage_range_col_dct_4, // .stage_range
- inv_cos_bit_col_dct_4, // .cos_bit
- TXFM_TYPE_DCT4 // .txfm_type
-};
-
-// ---------------- col config inv_dct_8 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8 = {
- 8, // .txfm_size
- 6, // .stage_num
- inv_shift_8, // .shift
- inv_stage_range_col_dct_8, // .stage_range
- inv_cos_bit_col_dct_8, // .cos_bit_
- TXFM_TYPE_DCT8 // .txfm_type
-};
-// ---------------- col config inv_dct_16 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_16 = {
- 16, // .txfm_size
- 8, // .stage_num
- inv_shift_16, // .shift
- inv_stage_range_col_dct_16, // .stage_range
- inv_cos_bit_col_dct_16, // .cos_bit
- TXFM_TYPE_DCT16 // .txfm_type
-};
-
-// ---------------- col config inv_dct_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_32 = {
- 32, // .txfm_size
- 10, // .stage_num
- inv_shift_32, // .shift
- inv_stage_range_col_dct_32, // .stage_range
- inv_cos_bit_col_dct_32, // .cos_bit_col
- TXFM_TYPE_DCT32 // .txfm_type
-};
-
-// ---------------- col config inv_dct_64 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_64 = {
- 64, // .txfm_size
- 12, // .stage_num
- inv_shift_64, // .shift
- inv_stage_range_col_dct_64, // .stage_range
- inv_cos_bit_col_dct_64, // .cos_bit
- TXFM_TYPE_DCT64, // .txfm_type_col
-};
-
-// ---------------- col config inv_adst_4 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4 = {
- 4, // .txfm_size
- 6, // .stage_num
- inv_shift_4, // .shift
- inv_stage_range_col_adst_4, // .stage_range
- inv_cos_bit_col_adst_4, // .cos_bit
- TXFM_TYPE_ADST4, // .txfm_type
-};
-
-// ---------------- col config inv_adst_8 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8 = {
- 8, // .txfm_size
- 8, // .stage_num
- inv_shift_8, // .shift
- inv_stage_range_col_adst_8, // .stage_range
- inv_cos_bit_col_adst_8, // .cos_bit
- TXFM_TYPE_ADST8, // .txfm_type_col
-};
-
-// ---------------- col config inv_adst_16 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_16 = {
- 16, // .txfm_size
- 10, // .stage_num
- inv_shift_16, // .shift
- inv_stage_range_col_adst_16, // .stage_range
- inv_cos_bit_col_adst_16, // .cos_bit
- TXFM_TYPE_ADST16, // .txfm_type
-};
-
-// ---------------- col config inv_adst_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_32 = {
- 32, // .txfm_size
- 12, // .stage_num
- inv_shift_32, // .shift
- inv_stage_range_col_adst_32, // .stage_range
- inv_cos_bit_col_adst_32, // .cos_bit
- TXFM_TYPE_ADST32, // .txfm_type
-};
-
-#if CONFIG_EXT_TX
-// identity does not need to differentiate between row and col
-// ---------------- row/col config inv_identity_4 ----------
-static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_4 = {
- 4, // .txfm_size
- 1, // .stage_num
- inv_shift_4, // .shift
- inv_stage_range_idx_4, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY4, // .txfm_type
-};
-
-// ---------------- row/col config inv_identity_8 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_8 = {
- 8, // .txfm_size
- 1, // .stage_num
- inv_shift_8, // .shift
- inv_stage_range_idx_8, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY8, // .txfm_type
-};
-
-// ---------------- row/col config inv_identity_16 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_16 = {
- 16, // .txfm_size
- 1, // .stage_num
- inv_shift_16, // .shift
- inv_stage_range_idx_16, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY16, // .txfm_type
-};
-
-// ---------------- row/col config inv_identity_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_32 = {
- 32, // .txfm_size
- 1, // .stage_num
- inv_shift_32, // .shift
- inv_stage_range_idx_32, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY32, // .txfm_type
-};
-
-#if CONFIG_TX64X64
-// ---------------- row/col config inv_identity_32 ----------------
-static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_64 = {
- 64, // .txfm_size
- 1, // .stage_num
- inv_shift_64, // .shift
- inv_stage_range_idx_64, // .stage_range
- NULL, // .cos_bit
- TXFM_TYPE_IDENTITY64, // .txfm_type
-};
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
#endif // AV1_INV_TXFM2D_CFG_H_
diff --git a/third_party/aom/av1/common/av1_inv_txfm2d.c b/third_party/aom/av1/common/av1_inv_txfm2d.c
index 031d11b40..4e6944314 100644
--- a/third_party/aom/av1/common/av1_inv_txfm2d.c
+++ b/third_party/aom/av1/common/av1_inv_txfm2d.c
@@ -9,218 +9,252 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./av1_rtcd.h"
-#include "aom_dsp/inv_txfm.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/enums.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
+void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+ 0.5 shifts per pixel. */
+ int i;
+ tran_low_t output[16];
+ tran_low_t a1, b1, c1, d1, e1;
+ const tran_low_t *ip = input;
+ tran_low_t *op = output;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ c1 = ip[1] >> UNIT_QUANT_SHIFT;
+ d1 = ip[2] >> UNIT_QUANT_SHIFT;
+ b1 = ip[3] >> UNIT_QUANT_SHIFT;
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+
+ op[0] = a1;
+ op[1] = b1;
+ op[2] = c1;
+ op[3] = d1;
+ ip += 4;
+ op += 4;
+ }
+
+ ip = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[4 * 0];
+ c1 = ip[4 * 1];
+ d1 = ip[4 * 2];
+ b1 = ip[4 * 3];
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+
+ range_check_value(a1, bd + 1);
+ range_check_value(b1, bd + 1);
+ range_check_value(c1, bd + 1);
+ range_check_value(d1, bd + 1);
+
+ dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);
+ dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);
+ dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);
+ dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);
+
+ ip++;
+ dest++;
+ }
+}
+
+void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
+ int dest_stride, int bd) {
+ int i;
+ tran_low_t a1, e1;
+ tran_low_t tmp[4];
+ const tran_low_t *ip = in;
+ tran_low_t *op = tmp;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ (void)bd;
+
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ e1 = a1 >> 1;
+ a1 -= e1;
+ op[0] = a1;
+ op[1] = op[2] = op[3] = e1;
+
+ ip = tmp;
+ for (i = 0; i < 4; i++) {
+ e1 = ip[0] >> 1;
+ a1 = ip[0] - e1;
+ dest[dest_stride * 0] =
+ highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
+ dest[dest_stride * 1] =
+ highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
+ dest[dest_stride * 2] =
+ highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
+ dest[dest_stride * 3] =
+ highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
+ ip++;
+ dest++;
+ }
+}
+
static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
case TXFM_TYPE_DCT4: return av1_idct4_new;
case TXFM_TYPE_DCT8: return av1_idct8_new;
case TXFM_TYPE_DCT16: return av1_idct16_new;
case TXFM_TYPE_DCT32: return av1_idct32_new;
-#if CONFIG_TX64X64
case TXFM_TYPE_DCT64: return av1_idct64_new;
-#endif // CONFIG_TX64X64
case TXFM_TYPE_ADST4: return av1_iadst4_new;
case TXFM_TYPE_ADST8: return av1_iadst8_new;
case TXFM_TYPE_ADST16: return av1_iadst16_new;
- case TXFM_TYPE_ADST32: return av1_iadst32_new;
-#if CONFIG_EXT_TX
case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c;
case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
-#if CONFIG_TX64X64
- case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
default: assert(0); return NULL;
}
}
-static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
- // DCT
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
- &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32,
-#if CONFIG_TX64X64
- &inv_txfm_1d_col_cfg_dct_64
-#endif // CONFIG_TX64X64
- },
- // ADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
- &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
-#if CONFIG_TX64X64
- NULL
-#endif // CONFIG_TX64X64
- },
-#if CONFIG_EXT_TX
- // FLIPADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
- &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
-#if CONFIG_TX64X64
- NULL
-#endif // CONFIG_TX64X64
- },
- // IDENTITY
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
- &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
-#if CONFIG_TX64X64
- &inv_txfm_1d_cfg_identity_64
-#endif // CONFIG_TX64X64
- },
-#endif // CONFIG_EXT_TX
-};
+static const int8_t inv_shift_4x4[2] = { 0, -4 };
+static const int8_t inv_shift_8x8[2] = { -1, -4 };
+static const int8_t inv_shift_16x16[2] = { -2, -4 };
+static const int8_t inv_shift_32x32[2] = { -2, -4 };
+static const int8_t inv_shift_64x64[2] = { -2, -4 };
+static const int8_t inv_shift_4x8[2] = { 0, -4 };
+static const int8_t inv_shift_8x4[2] = { 0, -4 };
+static const int8_t inv_shift_8x16[2] = { -1, -4 };
+static const int8_t inv_shift_16x8[2] = { -1, -4 };
+static const int8_t inv_shift_16x32[2] = { -1, -4 };
+static const int8_t inv_shift_32x16[2] = { -1, -4 };
+static const int8_t inv_shift_32x64[2] = { -1, -4 };
+static const int8_t inv_shift_64x32[2] = { -1, -4 };
+static const int8_t inv_shift_4x16[2] = { -1, -4 };
+static const int8_t inv_shift_16x4[2] = { -1, -4 };
+static const int8_t inv_shift_8x32[2] = { -2, -4 };
+static const int8_t inv_shift_32x8[2] = { -2, -4 };
+static const int8_t inv_shift_16x64[2] = { -2, -4 };
+static const int8_t inv_shift_64x16[2] = { -2, -4 };
-static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
- // DCT
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
- &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
-#if CONFIG_TX64X64
- &inv_txfm_1d_row_cfg_dct_64,
-#endif // CONFIG_TX64X64
- },
- // ADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
- &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
-#if CONFIG_TX64X64
- NULL
-#endif // CONFIG_TX64X64
- },
-#if CONFIG_EXT_TX
- // FLIPADST
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
- &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
-#if CONFIG_TX64X64
- NULL
-#endif // CONFIG_TX64X64
- },
- // IDENTITY
- {
-#if CONFIG_CHROMA_2X2
- NULL,
-#endif
- &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
- &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
-#if CONFIG_TX64X64
- &inv_txfm_1d_cfg_identity_64
-#endif // CONFIG_TX64X64
- },
-#endif // CONFIG_EXT_TX
+const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = {
+ inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32,
+ inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16,
+ inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64,
+ inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32,
+ inv_shift_32x8, inv_shift_16x64, inv_shift_64x16,
};
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) {
- TXFM_2D_FLIP_CFG cfg;
- set_flip_cfg(tx_type, &cfg);
- const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
- const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
- const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
- const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
- cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size_col];
- cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size_row];
- return cfg;
-}
+/* clang-format off */
+const int8_t inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx
+ [MAX_TXWH_IDX] = { // txh_idx
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 },
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 },
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
+ { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
+ { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }
+ };
-#if CONFIG_TX64X64
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(TX_TYPE tx_type) {
- TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
- cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
- set_flip_cfg(tx_type, &cfg);
- break;
- default: assert(0);
- }
- return cfg;
-}
+const int8_t inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx
+ [MAX_TXWH_IDX] = { // txh_idx
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 },
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 },
+ { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
+ { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
+ { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }
+ };
+/* clang-format on */
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_32x64_cfg(int tx_type) {
- TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
- cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_32;
- set_flip_cfg(tx_type, &cfg);
- break;
- default: assert(0);
- }
- return cfg;
-}
+const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 };
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x32_cfg(int tx_type) {
- TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
- switch (tx_type) {
- case DCT_DCT:
- cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_32;
- cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
- set_flip_cfg(tx_type, &cfg);
- break;
- default: assert(0);
+void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
+ TXFM_2D_FLIP_CFG *cfg) {
+ assert(cfg != NULL);
+ cfg->tx_size = tx_size;
+ set_flip_cfg(tx_type, cfg);
+ av1_zero(cfg->stage_range_col);
+ av1_zero(cfg->stage_range_row);
+ set_flip_cfg(tx_type, cfg);
+ const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
+ const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
+ cfg->shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
+ if (cfg->txfm_type_col == TXFM_TYPE_ADST4) {
+ memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range));
+ }
+ cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
+ if (cfg->txfm_type_row == TXFM_TYPE_ADST4) {
+ memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range));
}
- return cfg;
+ cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
+ cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
}
-#endif // CONFIG_TX64X64
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
+ const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
int bd) {
- // Note when assigning txfm_size_col, we use the txfm_size from the
- // row configuration and vice versa. This is intentionally done to
- // accurately perform rectangular transforms. When the transform is
- // rectangular, the number of columns will be the same as the
- // txfm_size stored in the row cfg struct. It will make no difference
- // for square transforms.
- const int txfm_size_col = cfg->row_cfg->txfm_size;
- const int txfm_size_row = cfg->col_cfg->txfm_size;
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
- : cfg->col_cfg->shift;
+ const int fwd_shift = inv_start_range[tx_size];
+ const int8_t *shift = cfg->shift;
+ int8_t opt_range_row, opt_range_col;
+ if (bd == 8) {
+ opt_range_row = 16;
+ opt_range_col = 16;
+ } else if (bd == 10) {
+ opt_range_row = 18;
+ opt_range_col = 16;
+ } else {
+ assert(bd == 12);
+ opt_range_row = 20;
+ opt_range_col = 18;
+ }
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1;
+ for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
+ int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1;
+ (void)real_range_row;
+ if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) {
+ // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
+ // so opt_range_col >= real_range_col will not hold
+ stage_range_row[i] = opt_range_row;
+ } else {
+ assert(opt_range_row >= real_range_row);
+ stage_range_row[i] = opt_range_row;
+ }
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_col[i] =
- cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] + bd + 1;
+ for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
+ int real_range_col =
+ cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1;
+ (void)real_range_col;
+ if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) {
+ // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
+ // so opt_range_col >= real_range_col will not hold
+ stage_range_col[i] = opt_range_col;
+ } else {
+ assert(opt_range_col >= real_range_col);
+ stage_range_col[i] = opt_range_col;
+ }
}
}
static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
int stride, TXFM_2D_FLIP_CFG *cfg,
- int32_t *txfm_buf, int8_t fwd_shift,
+ int32_t *txfm_buf, TX_SIZE tx_size,
int bd) {
// Note when assigning txfm_size_col, we use the txfm_size from the
// row configuration and vice versa. This is intentionally done to
@@ -228,39 +262,48 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
// rectangular, the number of columns will be the same as the
// txfm_size stored in the row cfg struct. It will make no difference
// for square transforms.
- const int txfm_size_col = cfg->row_cfg->txfm_size;
- const int txfm_size_row = cfg->col_cfg->txfm_size;
+ const int txfm_size_col = tx_size_wide[cfg->tx_size];
+ const int txfm_size_row = tx_size_high[cfg->tx_size];
// Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
- : cfg->col_cfg->shift;
+ const int8_t *shift = cfg->shift;
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
- assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
- av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, fwd_shift, bd);
+ assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
+ assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
+ av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
- const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
- const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
- const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
- const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->row_cfg->txfm_type);
+ const int8_t cos_bit_col = cfg->cos_bit_col;
+ const int8_t cos_bit_row = cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
- // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * txfm_size_row
+ // txfm_buf's length is txfm_size_row * txfm_size_col + 2 *
+ // AOMMAX(txfm_size_row, txfm_size_col)
// it is used for intermediate data buffering
+ const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_in = txfm_buf;
- int32_t *temp_out = temp_in + txfm_size_row;
- int32_t *buf = temp_out + txfm_size_row;
+ int32_t *temp_out = temp_in + buf_offset;
+ int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
int c, r;
// Rows
for (r = 0; r < txfm_size_row; ++r) {
- txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
- round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- // Multiply everything by Sqrt2 if the transform is rectangular
- if (txfm_size_row != txfm_size_col) {
- for (c = 0; c < txfm_size_col; ++c)
- buf_ptr[c] = (int32_t)dct_const_round_shift(buf_ptr[c] * Sqrt2);
+ if (abs(rect_type) == 1) {
+ for (c = 0; c < txfm_size_col; ++c) {
+ temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits);
+ }
+ clamp_buf(temp_in, txfm_size_col, bd + 8);
+ txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
+ } else {
+ for (c = 0; c < txfm_size_col; ++c) {
+ temp_in[c] = input[c];
+ }
+ clamp_buf(temp_in, txfm_size_col, bd + 8);
+ txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
}
+ av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
@@ -275,8 +318,9 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
+ clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16));
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
- round_shift_array(temp_out, txfm_size_row, -shift[1]);
+ av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (cfg->ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
@@ -296,156 +340,166 @@ static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
int stride, int32_t *txfm_buf,
TX_TYPE tx_type, TX_SIZE tx_size,
int bd) {
- TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size);
- TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
- inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf,
- fwd_shift_sum[tx_size_sqr], bd);
+ TXFM_2D_FLIP_CFG cfg;
+ av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
+ // Forward shift sum uses larger square size, to be consistent with what
+ // av1_gen_inv_stage_range() does for inverse shifts.
+ inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd);
}
void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[4 * 8 + 8 + 8];
+ DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd);
}
void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int txfm_buf[8 * 4 + 8 + 8];
- int32_t rinput[8 * 4];
- uint16_t routput[8 * 4];
- TX_SIZE tx_size = TX_8X4;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int32(rinput, rw, input, w, w, h);
- transpose_uint16(routput, rw, output, stride, w, h);
- inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
- transpose_uint16(output, stride, routput, rw, rw, rh);
-#else
- int txfm_buf[8 * 4 + 4 + 4];
+ DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
-#endif
}
void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[8 * 16 + 16 + 16];
+ DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd);
}
void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int txfm_buf[16 * 8 + 16 + 16];
- int32_t rinput[16 * 8];
- uint16_t routput[16 * 8];
- TX_SIZE tx_size = TX_16X8;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int32(rinput, rw, input, w, w, h);
- transpose_uint16(routput, rw, output, stride, w, h);
- inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
- transpose_uint16(output, stride, routput, rw, rw, rh);
-#else
- int txfm_buf[16 * 8 + 8 + 8];
+ DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
-#endif
}
void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[16 * 32 + 32 + 32];
+ DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd);
}
void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int txfm_buf[32 * 16 + 32 + 32];
- int32_t rinput[32 * 16];
- uint16_t routput[32 * 16];
- TX_SIZE tx_size = TX_32X16;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int32(rinput, rw, input, w, w, h);
- transpose_uint16(routput, rw, output, stride, w, h);
- inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
- transpose_uint16(output, stride, routput, rw, rw, rh);
-#else
- int txfm_buf[32 * 16 + 16 + 16];
+ DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
-#endif
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[4 * 4 + 4 + 4];
+ DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd);
}
void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[8 * 8 + 8 + 8];
+ DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd);
}
void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[16 * 16 + 16 + 16];
+ DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd);
}
void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[32 * 32 + 32 + 32];
+ DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd);
}
-#if CONFIG_TX64X64
void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[64 * 64 + 64 + 64];
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X64, bd);
+ // TODO(urvang): Can the same array be reused, instead of using a new array?
+ // Remap 32x32 input into a modified 64x64 by:
+ // - Copying over these values in top-left 32x32 locations.
+ // - Setting the rest of the locations to 0.
+ int32_t mod_input[64 * 64];
+ for (int row = 0; row < 32; ++row) {
+ memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
+ memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
+ }
+ memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input));
+ DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
+ inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64,
+ bd);
}
void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
-#if CONFIG_TXMG
- int txfm_buf[64 * 32 + 64 + 64];
- int32_t rinput[64 * 32];
- uint16_t routput[64 * 32];
- TX_SIZE tx_size = TX_64X32;
- TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
- TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- int rw = h;
- int rh = w;
- transpose_int32(rinput, rw, input, w, w, h);
- transpose_uint16(routput, rw, output, stride, w, h);
- inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
- transpose_uint16(output, stride, routput, rw, rw, rh);
-#else
- int txfm_buf[64 * 32 + 64 + 64];
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X32, bd);
-#endif
+ // Remap 32x32 input into a modified 64x32 by:
+ // - Copying over these values in top-left 32x32 locations.
+ // - Setting the rest of the locations to 0.
+ int32_t mod_input[64 * 32];
+ for (int row = 0; row < 32; ++row) {
+ memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
+ memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
+ }
+ DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
+ inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32,
+ bd);
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
- int txfm_buf[64 * 32 + 64 + 64];
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
+ // Remap 32x32 input into a modified 32x64 input by:
+ // - Copying over these values in top-left 32x32 locations.
+ // - Setting the rest of the locations to 0.
+ int32_t mod_input[32 * 64];
+ memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input));
+ memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input));
+ DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
+ inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64,
+ bd);
+}
+
+void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ // Remap 16x32 input into a modified 16x64 input by:
+ // - Copying over these values in top-left 16x32 locations.
+ // - Setting the rest of the locations to 0.
+ int32_t mod_input[16 * 64];
+ memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input));
+ memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input));
+ DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
+ inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64,
+ bd);
+}
+
+void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ // Remap 32x16 input into a modified 64x16 by:
+ // - Copying over these values in top-left 32x16 locations.
+ // - Setting the rest of the locations to 0.
+ int32_t mod_input[64 * 16];
+ for (int row = 0; row < 16; ++row) {
+ memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
+ memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
+ }
+ DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
+ inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
+ bd);
+}
+
+void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd);
+}
+
+void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd);
+}
+
+void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd);
+}
+
+void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd);
}
-#endif // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_loopfilter.c b/third_party/aom/av1/common/av1_loopfilter.c
index 95f7a8687..738290fad 100644
--- a/third_party/aom/av1/common/av1_loopfilter.c
+++ b/third_party/aom/av1/common/av1_loopfilter.c
@@ -11,8 +11,9 @@
#include <math.h>
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
#include "aom_dsp/aom_dsp_common.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
@@ -21,590 +22,211 @@
#include "av1/common/reconinter.h"
#include "av1/common/seg_common.h"
-#if CONFIG_LOOPFILTER_LEVEL
static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
{ SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
{ SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
{ SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
};
-#if CONFIG_EXT_DELTA_Q
static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
{ 0, 1 }, { 2, 2 }, { 3, 3 }
};
-#endif // CONFIG_EXT_DELTA_Q
-#endif // CONFIG_LOOPFILTER_LEVEL
-
-#if CONFIG_LPF_DIRECT
-static void pick_filter_pixel_left(uint8_t *const src, uint8_t *const line,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int direct) {
- int i;
- int pos = row * pitch + col;
-
- for (i = 0; i < length; ++i) {
- int dy = 0;
- switch (direct) {
- case VERT_HORZ: dy = 0; break;
- case DEGREE_45: dy = 1; break;
- case DEGREE_135: dy = -1; break;
- }
- col -= 1;
- row += dy;
- if (col >= 0 && col < width && row >= 0 && row < height) {
- pos = row * pitch + col;
- line[pivot - 1 - i] = src[pos];
- orig_pos[pivot - 1 - i] = pos;
- }
- }
-}
-static void pick_filter_pixel_right(uint8_t *const src, uint8_t *const line,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int direct) {
- int i;
- int pos = row * pitch + col;
+typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
- line[pivot] = src[pos];
- orig_pos[pivot] = pos;
+static const int mode_lf_lut[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
+ 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
+ 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
+};
- for (i = 1; i < length; ++i) {
- int dy = 0;
- switch (direct) {
- case VERT_HORZ: dy = 0; break;
- case DEGREE_45: dy = -1; break;
- case DEGREE_135: dy = 1; break;
- }
- col += 1;
- row += dy;
- if (col >= 0 && col < width && row >= 0 && row < height) {
- pos = row * pitch + col;
- line[pivot + i] = src[pos];
- orig_pos[pivot + i] = pos;
- }
- }
-}
+#if LOOP_FILTER_BITMASK
+// 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
+// We use 4 uint64_t to represent the 256 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the left border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this (-- and | are used for better view)
+//
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// -----------------
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+// 10101010|10101010
+//
+// A loopfilter should be applied to every other 4x4 horizontally.
+// TODO(chengchen): make these tables static
+const FilterMask left_txform_mask[TX_SIZES] = {
+ { { 0xffffffffffffffffULL, // TX_4X4,
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
-static void pick_filter_pixel_above(uint8_t *const src, uint8_t *const line,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int direct) {
- int i;
- int pos = row * pitch + col;
-
- for (i = 0; i < length; ++i) {
- int dx = 0;
- switch (direct) {
- case VERT_HORZ: dx = 0; break;
- case DEGREE_45: dx = 1; break;
- case DEGREE_135: dx = -1; break;
- }
- col += dx;
- row -= 1;
- if (col >= 0 && col < width && row >= 0 && row < height) {
- pos = row * pitch + col;
- line[pivot - 1 - i] = src[pos];
- orig_pos[pivot - 1 - i] = pos;
- }
- }
-}
+ { { 0x5555555555555555ULL, // TX_8X8,
+ 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL } },
-static void pick_filter_pixel_bot(uint8_t *const src, uint8_t *const line,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int direct) {
- int i;
- int pos = row * pitch + col;
+ { { 0x1111111111111111ULL, // TX_16X16,
+ 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL } },
- line[pivot] = src[pos];
- orig_pos[pivot] = pos;
+ { { 0x0101010101010101ULL, // TX_32X32,
+ 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL } },
- for (i = 1; i < length; ++i) {
- int dx = 0;
- switch (direct) {
- case VERT_HORZ: dx = 0; break;
- case DEGREE_45: dx = -1; break;
- case DEGREE_135: dx = 1; break;
- }
- col += dx;
- row += 1;
- if (col >= 0 && col < width && row >= 0 && row < height) {
- pos = row * pitch + col;
- line[pivot + i] = src[pos];
- orig_pos[pivot + i] = pos;
- }
- }
-}
+ { { 0x0001000100010001ULL, // TX_64X64,
+ 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
+};
-static void pick_filter_block_vert(uint8_t *const src, uint8_t *const block,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int line_length, int unit,
- int direct) {
- int i;
- for (i = 0; i < 8 * unit; ++i) {
- pick_filter_pixel_left(src, block + i * line_length,
- orig_pos + i * line_length, length, row + i, col,
- width, height, pitch, pivot, direct);
- pick_filter_pixel_right(src, block + i * line_length,
- orig_pos + i * line_length, length, row + i, col,
- width, height, pitch, pivot, direct);
- }
-}
+// 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
+// We use 4 uint64_t to represent the 256 bit.
+// Each 1 represents a position where we should apply a loop filter
+// across the top border of an 4x4 block boundary.
+//
+// In the case of TX_8x8-> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// -----------------
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+// 11111111|11111111
+// 00000000|00000000
+//
+// A loopfilter should be applied to every other 4x4 horizontally.
+const FilterMask above_txform_mask[TX_SIZES] = {
+ { { 0xffffffffffffffffULL, // TX_4X4
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
-static void pick_filter_block_horz(uint8_t *const src, uint8_t *const block,
- int *const orig_pos, int length, int row,
- int col, int width, int height, int pitch,
- int pivot, int line_length, int unit,
- int direct) {
- int i, j;
- int num = 8 * unit;
- for (i = 0; i < num; ++i) {
- pick_filter_pixel_above(src, block + i * line_length,
- orig_pos + i * line_length, length, row, col + i,
- width, height, pitch, pivot, direct);
- pick_filter_pixel_bot(src, block + i * line_length,
- orig_pos + i * line_length, length, row, col + i,
- width, height, pitch, pivot, direct);
- }
+ { { 0x0000ffff0000ffffULL, // TX_8X8
+ 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL } },
- // rearrange block
- // TODO(chengchen): make it in-place or a stand alone function
- uint8_t tmp_block[256];
- int tmp_pos[256];
- for (i = 0; i < 256; ++i) {
- tmp_block[i] = 0;
- tmp_pos[i] = -1;
- }
- for (i = 0; i < num; ++i) {
- for (j = 0; j < line_length; ++j) {
- tmp_block[j * line_length + i] = block[i * line_length + j];
- tmp_pos[j * line_length + i] = orig_pos[i * line_length + j];
- }
- }
- for (i = 0; i < 256; ++i) {
- block[i] = tmp_block[i];
- orig_pos[i] = tmp_pos[i];
- }
-}
+ { { 0x000000000000ffffULL, // TX_16X16
+ 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL } },
-static int compute_block_grad(uint8_t *const src, int length, int row, int col,
- int width, int height, int pitch, int unit,
- int vert_or_horz, int direct) {
- int i, j;
- int r0, c0, pos0, r1 = 0, c1 = 0, pos1;
- int sum_grad = 0;
- for (i = 0; i < 8 * unit; ++i) {
- // vert_or_horz: 0 vertical edge, 1 horizontal edge
- r0 = vert_or_horz ? row : row + i;
- c0 = vert_or_horz ? col + i : col;
- pos0 = r0 * pitch + c0;
-
- for (j = 0; j < length; ++j) {
- if (vert_or_horz == 0) {
- switch (direct) {
- case VERT_HORZ: r1 = r0; break;
- case DEGREE_45: r1 = r0 + 1; break;
- case DEGREE_135: r1 = r0 - 1; break;
- }
- c1 = c0 - 1;
- } else {
- r1 = r0 - 1;
- switch (direct) {
- case VERT_HORZ: c1 = c0; break;
- case DEGREE_45: c1 = c0 + 1; break;
- case DEGREE_135: c1 = c0 - 1; break;
- }
- }
- pos1 = r1 * pitch + c1;
+ { { 0x000000000000ffffULL, // TX_32X32
+ 0x0000000000000000ULL, 0x000000000000ffffULL, 0x0000000000000000ULL } },
- if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
- r1 < height && c1 >= 0 && c1 < width) {
- sum_grad += abs(src[pos1] - src[pos0]);
- } else {
- sum_grad += 255; // penalize unreachable boundary
- }
- r0 = r1;
- c0 = c1;
- pos0 = pos1;
- }
+ { { 0x000000000000ffffULL, // TX_64X64
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
+};
- r0 = vert_or_horz ? row : row + i;
- c0 = vert_or_horz ? col + i : col;
- pos0 = r0 * pitch + c0;
+// 64 bit mask to shift and set for each prediction size. A bit is set for
+// each 4x4 block that would be in the top left most block of the given block
+// size in the 64x64 block.
+const FilterMask size_mask_y[BLOCK_SIZES_ALL] = {
+ { { 0x0000000000000001ULL, // BLOCK_4X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
- for (j = 0; j < length - 1; ++j) {
- if (vert_or_horz == 0) {
- switch (direct) {
- case VERT_HORZ: r1 = r0; break;
- case DEGREE_45: r1 = r0 - 1; break;
- case DEGREE_135: r1 = r0 + 1; break;
- }
- c1 = c0 + 1;
- } else {
- r1 = r0 + 1;
- switch (direct) {
- case VERT_HORZ: c1 = c0; break;
- case DEGREE_45: c1 = c0 - 1; break;
- case DEGREE_135: c1 = c0 + 1; break;
- }
- }
- pos1 = r1 * pitch + c1;
+ { { 0x0000000000010001ULL, // BLOCK_4X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
- if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
- r1 < height && c1 >= 0 && c1 < width) {
- sum_grad += abs(src[pos1] - src[pos0]);
- } else {
- sum_grad += 255; // penalize unreachable boundary
- }
- r0 = r1;
- c0 = c1;
- pos0 = pos1;
- }
- }
+ { { 0x0000000000000003ULL, // BLOCK_8X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
- return sum_grad;
-}
+ { { 0x0000000000030003ULL, // BLOCK_8X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-static int pick_min_grad_direct(uint8_t *const src, int length, int row,
- int col, int width, int height, int pitch,
- int unit, int vert_or_horz) {
- int direct = VERT_HORZ;
- int min_grad = INT_MAX, sum_grad = 0;
-
- int degree;
- for (degree = 0; degree < FILTER_DEGREES; ++degree) {
- // compute abs gradient along each line for the filter block
- sum_grad = compute_block_grad(src, length, row, col, width, height, pitch,
- unit, vert_or_horz, degree);
- if (sum_grad < min_grad) {
- min_grad = sum_grad;
- direct = degree;
- }
- }
+ { { 0x0003000300030003ULL, // BLOCK_8X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
- return direct;
-}
-#endif // CONFIG_LPF_DIRECT
+ { { 0x00000000000f000fULL, // BLOCK_16X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-#define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1
-#define PARALLEL_DEBLOCKING_DISABLE_15TAP 0
-#if CONFIG_DEBLOCK_13TAP
-#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
-#else
-#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
-#endif
+ { { 0x000f000f000f000fULL, // BLOCK_16X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
-extern void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh);
+ { { 0x000f000f000f000fULL, // BLOCK_16X32
+ 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-extern void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh);
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int bd);
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X32
+ 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-extern void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int bd);
-#endif
+ { { 0x00ff00ff00ff00ffULL, // BLOCK_32X64
+ 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL } },
-// 64 bit masks for left transform size. Each 1 represents a position where
-// we should apply a loop filter across the left border of an 8x8 block
-// boundary.
-//
-// In the case of TX_16X16-> ( in low order byte first we end up with
-// a mask that looks like this
-//
-// 10101010
-// 10101010
-// 10101010
-// 10101010
-// 10101010
-// 10101010
-// 10101010
-// 10101010
-//
-// A loopfilter should be applied to every other 8x8 horizontally.
-static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
- 0xffffffffffffffffULL, // TX_2X2
-#endif
- 0xffffffffffffffffULL, // TX_4X4
- 0xffffffffffffffffULL, // TX_8x8
- 0x5555555555555555ULL, // TX_16x16
- 0x1111111111111111ULL, // TX_32x32
-#if CONFIG_TX64X64
- 0x0101010101010101ULL, // TX_64x64
-#endif // CONFIG_TX64X64
-};
+ { { 0xffffffffffffffffULL, // BLOCK_64X32
+ 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-// 64 bit masks for above transform size. Each 1 represents a position where
-// we should apply a loop filter across the top border of an 8x8 block
-// boundary.
-//
-// In the case of TX_32x32 -> ( in low order byte first we end up with
-// a mask that looks like this
-//
-// 11111111
-// 00000000
-// 00000000
-// 00000000
-// 11111111
-// 00000000
-// 00000000
-// 00000000
-//
-// A loopfilter should be applied to every other 4 the row vertically.
-static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
- 0xffffffffffffffffULL, // TX_4X4
-#endif
- 0xffffffffffffffffULL, // TX_4X4
- 0xffffffffffffffffULL, // TX_8x8
- 0x00ff00ff00ff00ffULL, // TX_16x16
- 0x000000ff000000ffULL, // TX_32x32
-#if CONFIG_TX64X64
- 0x00000000000000ffULL, // TX_64x64
-#endif // CONFIG_TX64X64
-};
+ { { 0xffffffffffffffffULL, // BLOCK_64X64
+ 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL } },
+ // Y plane max coding block size is 128x128, but the codec divides it
+ // into 4 64x64 blocks.
+ // BLOCK_64X128
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+ // BLOCK_128X64
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
+ // BLOCK_128X128
+ { { 0x0ULL, 0x0ULL, 0x0ULL, 0x0ULL } },
-// 64 bit masks for prediction sizes (left). Each 1 represents a position
-// where left border of an 8x8 block. These are aligned to the right most
-// appropriate bit, and then shifted into place.
-//
-// In the case of TX_16x32 -> ( low order byte first ) we end up with
-// a mask that looks like this :
-//
-// 10000000
-// 10000000
-// 10000000
-// 10000000
-// 00000000
-// 00000000
-// 00000000
-// 00000000
-static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0000000000000001ULL, // BLOCK_2X2,
- 0x0000000000000001ULL, // BLOCK_2X4,
- 0x0000000000000001ULL, // BLOCK_4X2,
-#endif
- 0x0000000000000001ULL, // BLOCK_4X4,
- 0x0000000000000001ULL, // BLOCK_4X8,
- 0x0000000000000001ULL, // BLOCK_8X4,
- 0x0000000000000001ULL, // BLOCK_8X8,
- 0x0000000000000101ULL, // BLOCK_8X16,
- 0x0000000000000001ULL, // BLOCK_16X8,
- 0x0000000000000101ULL, // BLOCK_16X16,
- 0x0000000001010101ULL, // BLOCK_16X32,
- 0x0000000000000101ULL, // BLOCK_32X16,
- 0x0000000001010101ULL, // BLOCK_32X32,
- 0x0101010101010101ULL, // BLOCK_32X64,
- 0x0000000001010101ULL, // BLOCK_64X32,
- 0x0101010101010101ULL, // BLOCK_64X64,
- 0x0000000000000101ULL, // BLOCK_4X16,
- 0x0000000000000001ULL, // BLOCK_16X4,
- 0x0000000001010101ULL, // BLOCK_8X32,
- 0x0000000000000001ULL, // BLOCK_32X8,
- 0x0101010101010101ULL, // BLOCK_16X64,
- 0x0000000000000101ULL, // BLOCK_64X16
-};
+ { { 0x0001000100010001ULL, // BLOCK_4X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-// 64 bit mask to shift and set for each prediction size.
-static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0000000000000001ULL, // BLOCK_2X2
- 0x0000000000000001ULL, // BLOCK_2X4
- 0x0000000000000001ULL, // BLOCK_4X2
-#endif
- 0x0000000000000001ULL, // BLOCK_4X4
- 0x0000000000000001ULL, // BLOCK_4X8
- 0x0000000000000001ULL, // BLOCK_8X4
- 0x0000000000000001ULL, // BLOCK_8X8
- 0x0000000000000001ULL, // BLOCK_8X16,
- 0x0000000000000003ULL, // BLOCK_16X8
- 0x0000000000000003ULL, // BLOCK_16X16
- 0x0000000000000003ULL, // BLOCK_16X32,
- 0x000000000000000fULL, // BLOCK_32X16,
- 0x000000000000000fULL, // BLOCK_32X32,
- 0x000000000000000fULL, // BLOCK_32X64,
- 0x00000000000000ffULL, // BLOCK_64X32,
- 0x00000000000000ffULL, // BLOCK_64X64,
- 0x0000000000000001ULL, // BLOCK_4X16,
- 0x0000000000000003ULL, // BLOCK_16X4,
- 0x0000000000000001ULL, // BLOCK_8X32,
- 0x000000000000000fULL, // BLOCK_32X8,
- 0x0000000000000003ULL, // BLOCK_16X64,
- 0x00000000000000ffULL, // BLOCK_64X16
-};
-// 64 bit mask to shift and set for each prediction size. A bit is set for
-// each 8x8 block that would be in the top left most block of the given block
-// size in the 64x64 block.
-static const uint64_t size_mask[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0000000000000001ULL, // BLOCK_2X2
- 0x0000000000000001ULL, // BLOCK_2X4
- 0x0000000000000001ULL, // BLOCK_4X2
-#endif
- 0x0000000000000001ULL, // BLOCK_4X4
- 0x0000000000000001ULL, // BLOCK_4X8
- 0x0000000000000001ULL, // BLOCK_8X4
- 0x0000000000000001ULL, // BLOCK_8X8
- 0x0000000000000101ULL, // BLOCK_8X16,
- 0x0000000000000003ULL, // BLOCK_16X8
- 0x0000000000000303ULL, // BLOCK_16X16
- 0x0000000003030303ULL, // BLOCK_16X32,
- 0x0000000000000f0fULL, // BLOCK_32X16,
- 0x000000000f0f0f0fULL, // BLOCK_32X32,
- 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64,
- 0x00000000ffffffffULL, // BLOCK_64X32,
- 0xffffffffffffffffULL, // BLOCK_64X64,
- 0x0000000000000101ULL, // BLOCK_4X16,
- 0x0000000000000003ULL, // BLOCK_16X4,
- 0x0000000001010101ULL, // BLOCK_8X32,
- 0x000000000000000fULL, // BLOCK_32X8,
- 0x0303030303030303ULL, // BLOCK_16X64,
- 0x000000000000ffffULL, // BLOCK_64X16
-};
+ { { 0x000000000000000fULL, // BLOCK_16X4
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-// These are used for masking the left and above 32x32 borders.
-static const uint64_t left_border = 0x1111111111111111ULL;
-static const uint64_t above_border = 0x000000ff000000ffULL;
+ { { 0x0003000300030003ULL, // BLOCK_8X32
+ 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-// 16 bit masks for uv transform sizes.
-static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
- 0xffff, // TX_2X2
-#endif
- 0xffff, // TX_4X4
- 0xffff, // TX_8x8
- 0x5555, // TX_16x16
- 0x1111, // TX_32x32
-#if CONFIG_TX64X64
- 0x0101, // TX_64x64, never used
-#endif // CONFIG_TX64X64
-};
+ { { 0x0000000000ff00ffULL, // BLOCK_32X8
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
- 0xffff, // TX_2X2
-#endif
- 0xffff, // TX_4X4
- 0xffff, // TX_8x8
- 0x0f0f, // TX_16x16
- 0x000f, // TX_32x32
-#if CONFIG_TX64X64
- 0x0003, // TX_64x64, never used
-#endif // CONFIG_TX64X64
-};
+ { { 0x000f000f000f000fULL, // BLOCK_16X64
+ 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL } },
-// 16 bit left mask to shift and set for each uv prediction size.
-static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0001, // BLOCK_2X2,
- 0x0001, // BLOCK_2X4,
- 0x0001, // BLOCK_4X2,
-#endif
- 0x0001, // BLOCK_4X4,
- 0x0001, // BLOCK_4X8,
- 0x0001, // BLOCK_8X4,
- 0x0001, // BLOCK_8X8,
- 0x0001, // BLOCK_8X16,
- 0x0001, // BLOCK_16X8,
- 0x0001, // BLOCK_16X16,
- 0x0011, // BLOCK_16X32,
- 0x0001, // BLOCK_32X16,
- 0x0011, // BLOCK_32X32,
- 0x1111, // BLOCK_32X64
- 0x0011, // BLOCK_64X32,
- 0x1111, // BLOCK_64X64,
- 0x0001, // BLOCK_4X16,
- 0x0001, // BLOCK_16X4,
- 0x0011, // BLOCK_8X32,
- 0x0001, // BLOCK_32X8,
- 0x1111, // BLOCK_16X64,
- 0x0001, // BLOCK_64X16,
+ { { 0xffffffffffffffffULL, // BLOCK_64X16
+ 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } }
};
-// 16 bit above mask to shift and set for uv each prediction size.
-static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0001, // BLOCK_2X2
- 0x0001, // BLOCK_2X4
- 0x0001, // BLOCK_4X2
-#endif
- 0x0001, // BLOCK_4X4
- 0x0001, // BLOCK_4X8
- 0x0001, // BLOCK_8X4
- 0x0001, // BLOCK_8X8
- 0x0001, // BLOCK_8X16,
- 0x0001, // BLOCK_16X8
- 0x0001, // BLOCK_16X16
- 0x0001, // BLOCK_16X32,
- 0x0003, // BLOCK_32X16,
- 0x0003, // BLOCK_32X32,
- 0x0003, // BLOCK_32X64,
- 0x000f, // BLOCK_64X32,
- 0x000f, // BLOCK_64X64,
- 0x0001, // BLOCK_4X16,
- 0x0001, // BLOCK_16X4,
- 0x0001, // BLOCK_8X32,
- 0x0003, // BLOCK_32X8,
- 0x0001, // BLOCK_16X64,
- 0x000f, // BLOCK_64X16
-};
+LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row,
+ int mi_col) {
+ if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
+ (mi_col << MI_SIZE_LOG2) >= cm->width)
+ return NULL;
+ assert(cm->lf.lfm != NULL);
+ const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64
+ const int col = mi_col >> MIN_MIB_SIZE_LOG2;
+ return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
+}
-// 64 bit mask to shift and set for each uv prediction size
-static const uint16_t size_mask_uv[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0x0001, // BLOCK_2X2
- 0x0001, // BLOCK_2X4
- 0x0001, // BLOCK_4X2
-#endif
- 0x0001, // BLOCK_4X4
- 0x0001, // BLOCK_4X8
- 0x0001, // BLOCK_8X4
- 0x0001, // BLOCK_8X8
- 0x0001, // BLOCK_8X16,
- 0x0001, // BLOCK_16X8
- 0x0001, // BLOCK_16X16
- 0x0011, // BLOCK_16X32,
- 0x0003, // BLOCK_32X16,
- 0x0033, // BLOCK_32X32,
- 0x3333, // BLOCK_32X64,
- 0x00ff, // BLOCK_64X32,
- 0xffff, // BLOCK_64X64,
- 0x0001, // BLOCK_4X16,
- 0x0001, // BLOCK_16X4,
- 0x0011, // BLOCK_8X32,
- 0x0003, // BLOCK_32X8,
- 0x1111, // BLOCK_16X64,
- 0x000f, // BLOCK_64X16
-};
-static const uint16_t left_border_uv = 0x1111;
-static const uint16_t above_border_uv = 0x000f;
+typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh);
-static const int mode_lf_lut[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
- 0,
-#if CONFIG_SMOOTH_HV
- 0, 0,
-#endif // CONFIG_SMOOTH_HV
- 1, 1, 0, 1, // INTER_MODES (ZEROMV == 0)
-#if CONFIG_COMPOUND_SINGLEREF
- // 1, 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES
- // NOTE(zoeliu): Remove SR_NEAREST_NEWMV
- 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES
-#endif // CONFIG_COMPOUND_SINGLEREF
- 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
-};
+typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1);
+
+typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh, int bd);
+
+typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1, int bd);
+#endif // LOOP_FILTER_BITMASK
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
int lvl;
@@ -626,64 +248,36 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
SIMD_WIDTH);
}
}
-#if CONFIG_EXT_DELTA_Q
static uint8_t get_filter_level(const AV1_COMMON *cm,
const loop_filter_info_n *lfi_n,
-#if CONFIG_LOOPFILTER_LEVEL
const int dir_idx, int plane,
-#endif
-#if CONFIG_LPF_SB
- int mi_row, int mi_col,
-#endif
const MB_MODE_INFO *mbmi) {
-#if CONFIG_LPF_SB
- return cm->mi[mi_row * cm->mi_stride + mi_col].mbmi.filt_lvl;
-#endif
-
-#if CONFIG_SUPERTX
- const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
- assert(
- IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
- assert(IMPLIES(supertx_enabled(mbmi),
- mbmi->segment_id_supertx <= mbmi->segment_id));
-#else
const int segment_id = mbmi->segment_id;
-#endif // CONFIG_SUPERTX
if (cm->delta_lf_present_flag) {
-#if CONFIG_LOOPFILTER_LEVEL
int delta_lf;
if (cm->delta_lf_multi) {
const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
- delta_lf = mbmi->curr_delta_lf[delta_lf_idx];
+ delta_lf = mbmi->delta_lf[delta_lf_idx];
} else {
- delta_lf = mbmi->current_delta_lf_from_base;
+ delta_lf = mbmi->delta_lf_from_base;
}
- int lvl_seg =
- clamp(delta_lf + cm->lf.filter_level[dir_idx], 0, MAX_LOOP_FILTER);
-#else
- int lvl_seg = clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level,
- 0, MAX_LOOP_FILTER);
-#endif
- const int scale = 1 << (lvl_seg >> 5);
-#if CONFIG_LOOPFILTER_LEVEL
+ int base_level;
+ if (plane == 0)
+ base_level = cm->lf.filter_level[dir_idx];
+ else if (plane == 1)
+ base_level = cm->lf.filter_level_u;
+ else
+ base_level = cm->lf.filter_level_v;
+ int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
assert(plane >= 0 && plane <= 2);
const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
- lvl_seg =
- clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
- MAX_LOOP_FILTER);
- }
-#else
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_ALT_LF)) {
- const int data = get_segdata(&cm->seg, segment_id, SEG_LVL_ALT_LF);
- lvl_seg =
- clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
- MAX_LOOP_FILTER);
+ lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
}
-#endif // CONFIG_LOOPFILTER_LEVEL
if (cm->lf.mode_ref_delta_enabled) {
+ const int scale = 1 << (lvl_seg >> 5);
lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
if (mbmi->ref_frame[0] > INTRA_FRAME)
lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
@@ -691,29 +285,10 @@ static uint8_t get_filter_level(const AV1_COMMON *cm,
}
return lvl_seg;
} else {
-#if CONFIG_LOOPFILTER_LEVEL
- return lfi_n
- ->lvl[segment_id][dir_idx][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
-#else
- return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
-#endif
+ return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
+ [mode_lf_lut[mbmi->mode]];
}
}
-#else
-static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
- const MB_MODE_INFO *mbmi) {
-#if CONFIG_SUPERTX
- const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
- assert(
- IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
- assert(IMPLIES(supertx_enabled(mbmi),
- mbmi->segment_id_supertx <= mbmi->segment_id));
-#else
- const int segment_id = mbmi->segment_id;
-#endif // CONFIG_SUPERTX
- return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
-}
-#endif
void av1_loop_filter_init(AV1_COMMON *cm) {
assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
@@ -721,172 +296,728 @@ void av1_loop_filter_init(AV1_COMMON *cm) {
struct loopfilter *lf = &cm->lf;
int lvl;
+ lf->combine_vert_horz_lf = 1;
+
// init limits for given sharpness
update_sharpness(lfi, lf->sharpness_level);
- lf->last_sharpness_level = lf->sharpness_level;
// init hev threshold const vectors
for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
}
-#if CONFIG_LPF_SB
-void av1_loop_filter_sb_level_init(AV1_COMMON *cm, int mi_row, int mi_col,
- int lvl) {
- const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
- const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
- const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
- const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
- const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
- const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
-
- int row, col;
- for (row = mi_row_start; row < mi_row_end; ++row) {
- for (col = mi_col_start; col < mi_col_end; ++col) {
- // Note: can't use cm->mi_grid_visible. Because for each partition,
- // all visible pointers will point to the first of the partition.
- cm->mi[row * cm->mi_stride + col].mbmi.filt_lvl = lvl;
- }
- }
-}
-#endif // CONFIG_LPF_SB
-
-void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
- int default_filt_lvl_r
-#if CONFIG_LOOPFILTER_LEVEL
- ,
- int plane
-#endif
- ) {
+// Update the loop filter for the current frame.
+// This should be called before loop_filter_rows(),
+// av1_loop_filter_frame() calls this function directly.
+void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
+ int plane_end) {
+ int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
+ int plane;
int seg_id;
// n_shift is the multiplier for lf_deltas
// the multiplier is 1 for when filter_lvl is between 0 and 31;
// 2 when filter_lvl is between 32 and 63
- int scale = 1 << (default_filt_lvl >> 5);
loop_filter_info_n *const lfi = &cm->lf_info;
struct loopfilter *const lf = &cm->lf;
const struct segmentation *const seg = &cm->seg;
- // update limits if sharpness has changed
- if (lf->last_sharpness_level != lf->sharpness_level) {
- update_sharpness(lfi, lf->sharpness_level);
- lf->last_sharpness_level = lf->sharpness_level;
- }
+ // update sharpness limits
+ update_sharpness(lfi, lf->sharpness_level);
+
+ filt_lvl[0] = cm->lf.filter_level[0];
+ filt_lvl[1] = cm->lf.filter_level_u;
+ filt_lvl[2] = cm->lf.filter_level_v;
+
+ filt_lvl_r[0] = cm->lf.filter_level[1];
+ filt_lvl_r[1] = cm->lf.filter_level_u;
+ filt_lvl_r[2] = cm->lf.filter_level_v;
+
+ for (plane = plane_start; plane < plane_end; plane++) {
+ if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
+ break;
+ else if (plane == 1 && !filt_lvl[1])
+ continue;
+ else if (plane == 2 && !filt_lvl[2])
+ continue;
- for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
- for (int dir = 0; dir < 2; ++dir) {
- int lvl_seg = (dir == 0) ? default_filt_lvl : default_filt_lvl_r;
-#if CONFIG_LOOPFILTER_LEVEL
- assert(plane >= 0 && plane <= 2);
- const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
- if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
- const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
- lvl_seg = clamp(
- seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
- 0, MAX_LOOP_FILTER);
+ for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
+ for (int dir = 0; dir < 2; ++dir) {
+ int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
+ assert(plane >= 0 && plane <= 2);
+ const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
+ if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
+ const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
+ lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
+ }
+
+ if (!lf->mode_ref_delta_enabled) {
+ // we could get rid of this if we assume that deltas are set to
+ // zero when not in use; encoder always uses deltas
+ memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
+ sizeof(lfi->lvl[plane][seg_id][dir]));
+ } else {
+ int ref, mode;
+ const int scale = 1 << (lvl_seg >> 5);
+ const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+ lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
+ clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+ for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
+ for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+ const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+ lf->mode_deltas[mode] * scale;
+ lfi->lvl[plane][seg_id][dir][ref][mode] =
+ clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ }
+ }
+ }
}
+ }
+ }
+
+#if LOOP_FILTER_BITMASK
+ memset(lf->neighbor_sb_lpf_info.tx_size_y_above, TX_64X64,
+ sizeof(TX_SIZE) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.tx_size_y_left, TX_64X64,
+ sizeof(TX_SIZE) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.tx_size_uv_above, TX_64X64,
+ sizeof(TX_SIZE) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.tx_size_uv_left, TX_64X64,
+ sizeof(TX_SIZE) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.y_level_above, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.y_level_left, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.u_level_above, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.u_level_left, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.v_level_above, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.v_level_left, 0,
+ sizeof(uint8_t) * MI_SIZE_64X64);
+ memset(lf->neighbor_sb_lpf_info.skip, 0, sizeof(uint8_t) * MI_SIZE_64X64);
+#endif // LOOP_FILTER_BITMASK
+}
+
+#if LOOP_FILTER_BITMASK
+// A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
+// Every 4 rows is represented by one uint64_t mask. Hence,
+// there are 4 uint64_t bitmask[4] to represent the 64x64 block.
+//
+// Given a location by (mi_col, mi_row), This function returns the index
+// 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
+//
+// For example, mi_row is the offset of pixels in mi size (4),
+// (mi_row / 4) returns which uint64_t.
+// After locating which uint64_t, mi_row % 4 is the
+// row offset, and each row has 16 = 1 << stride_log2 4x4 units.
+// Therefore, shift = (row << stride_log2) + mi_col;
+static int get_index_shift(int mi_col, int mi_row, int *index) {
+ // *index = mi_row >> 2;
+ // rows = mi_row % 4;
+ // stride_log2 = 4;
+ // shift = (rows << stride_log2) + mi_col;
+ *index = mi_row >> 2;
+ return ((mi_row & 3) << 4) | mi_col;
+}
+
+static void check_mask(const FilterMask *lfm) {
+#ifndef NDEBUG
+ for (int i = 0; i < 4; ++i) {
+ assert(!(lfm[TX_4X4].bits[i] & lfm[TX_8X8].bits[i]));
+ assert(!(lfm[TX_4X4].bits[i] & lfm[TX_16X16].bits[i]));
+ assert(!(lfm[TX_4X4].bits[i] & lfm[TX_32X32].bits[i]));
+ assert(!(lfm[TX_4X4].bits[i] & lfm[TX_64X64].bits[i]));
+ assert(!(lfm[TX_8X8].bits[i] & lfm[TX_16X16].bits[i]));
+ assert(!(lfm[TX_8X8].bits[i] & lfm[TX_32X32].bits[i]));
+ assert(!(lfm[TX_8X8].bits[i] & lfm[TX_64X64].bits[i]));
+ assert(!(lfm[TX_16X16].bits[i] & lfm[TX_32X32].bits[i]));
+ assert(!(lfm[TX_16X16].bits[i] & lfm[TX_64X64].bits[i]));
+ assert(!(lfm[TX_32X32].bits[i] & lfm[TX_64X64].bits[i]));
+ }
#else
- if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
- const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
- lvl_seg = clamp(
- seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
- 0, MAX_LOOP_FILTER);
+ (void)lfm;
+#endif
+}
+
+static void check_loop_filter_masks(const LoopFilterMask *lfm, int plane) {
+ if (plane == 0) {
+ // Assert if we try to apply 2 different loop filters at the same
+ // position.
+ check_mask(lfm->left_y);
+ check_mask(lfm->above_y);
+ } else if (plane == 1) {
+ check_mask(lfm->left_u);
+ check_mask(lfm->above_u);
+ } else {
+ check_mask(lfm->left_v);
+ check_mask(lfm->above_v);
+ }
+}
+
+static void update_masks(EDGE_DIR dir, int plane, uint64_t *mask,
+ TX_SIZE sqr_tx_size, LoopFilterMask *lfm) {
+ if (dir == VERT_EDGE) {
+ switch (plane) {
+ case 0:
+ for (int i = 0; i < 4; ++i) lfm->left_y[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ case 1:
+ for (int i = 0; i < 4; ++i) lfm->left_u[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ case 2:
+ for (int i = 0; i < 4; ++i) lfm->left_v[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ default: assert(plane <= 2);
+ }
+ } else {
+ switch (plane) {
+ case 0:
+ for (int i = 0; i < 4; ++i)
+ lfm->above_y[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ case 1:
+ for (int i = 0; i < 4; ++i)
+ lfm->above_u[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ case 2:
+ for (int i = 0; i < 4; ++i)
+ lfm->above_v[sqr_tx_size].bits[i] |= mask[i];
+ break;
+ default: assert(plane <= 2);
+ }
+ }
+}
+
+static int is_frame_boundary(AV1_COMMON *const cm, int plane, int mi_row,
+ int mi_col, int ssx, int ssy, EDGE_DIR dir) {
+ if (plane && (ssx || ssy)) {
+ if (ssx && ssy) { // format 420
+ if ((mi_row << MI_SIZE_LOG2) > cm->height ||
+ (mi_col << MI_SIZE_LOG2) > cm->width)
+ return 1;
+ } else if (ssx) { // format 422
+ if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
+ (mi_col << MI_SIZE_LOG2) > cm->width)
+ return 1;
+ }
+ } else {
+ if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
+ (mi_col << MI_SIZE_LOG2) >= cm->width)
+ return 1;
+ }
+
+ int row_or_col;
+ if (plane == 0) {
+ row_or_col = dir == VERT_EDGE ? mi_col : mi_row;
+ } else {
+ // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
+ // So if mi_col == 1, it is actually the frame boundary.
+ if (dir == VERT_EDGE) {
+ row_or_col = ssx ? (mi_col & 0x0FFFFFFE) : mi_col;
+ } else {
+ row_or_col = ssy ? (mi_row & 0x0FFFFFFE) : mi_row;
+ }
+ }
+ return row_or_col == 0;
+}
+
+static void setup_masks(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
+ int ssx, int ssy, TX_SIZE tx_size) {
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
+ const int x = (mi_col << (MI_SIZE_LOG2 - ssx));
+ const int y = (mi_row << (MI_SIZE_LOG2 - ssy));
+ // decide whether current vertical/horizontal edge needs loop filtering
+ for (EDGE_DIR dir = VERT_EDGE; dir <= HORZ_EDGE; ++dir) {
+ // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
+ mi_row |= ssy;
+ mi_col |= ssx;
+
+ MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
+ const MB_MODE_INFO *const mbmi = mi[0];
+ const int curr_skip = mbmi->skip && is_inter_block(mbmi);
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
+ const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
+ const uint8_t level = get_filter_level(cm, &cm->lf_info, dir, plane, mbmi);
+ const int prediction_masks = dir == VERT_EDGE
+ ? block_size_wide[plane_bsize] - 1
+ : block_size_high[plane_bsize] - 1;
+ const int is_coding_block_border =
+ dir == VERT_EDGE ? !(x & prediction_masks) : !(y & prediction_masks);
+
+ // TODO(chengchen): step can be optimized.
+ const int row_step = mi_size_high[TX_4X4] << ssy;
+ const int col_step = mi_size_wide[TX_4X4] << ssx;
+ const int mi_height =
+ dir == VERT_EDGE ? tx_size_high_unit[tx_size] << ssy : row_step;
+ const int mi_width =
+ dir == VERT_EDGE ? col_step : tx_size_wide_unit[tx_size] << ssx;
+
+ // assign filter levels
+ for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
+ for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
+ // do not filter frame boundary
+ // Note: when chroma planes' size are half of luma plane,
+ // chroma plane mi corresponds to even position.
+ // If frame size is not even, we still need to filter this chroma
+ // position. Therefore the boundary condition check needs to be
+ // separated to two cases.
+ if (plane && (ssx || ssy)) {
+ if (ssx && ssy) { // format 420
+ if ((r << MI_SIZE_LOG2) > cm->height ||
+ (c << MI_SIZE_LOG2) > cm->width)
+ continue;
+ } else if (ssx) { // format 422
+ if ((r << MI_SIZE_LOG2) >= cm->height ||
+ (c << MI_SIZE_LOG2) > cm->width)
+ continue;
+ }
+ } else {
+ if ((r << MI_SIZE_LOG2) >= cm->height ||
+ (c << MI_SIZE_LOG2) >= cm->width)
+ continue;
+ }
+
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ if (plane == 0) {
+ if (dir == VERT_EDGE)
+ lfm->lfl_y_ver[row][col] = level;
+ else
+ lfm->lfl_y_hor[row][col] = level;
+ } else if (plane == 1) {
+ if (dir == VERT_EDGE)
+ lfm->lfl_u_ver[row][col] = level;
+ else
+ lfm->lfl_u_hor[row][col] = level;
+ } else {
+ if (dir == VERT_EDGE)
+ lfm->lfl_v_ver[row][col] = level;
+ else
+ lfm->lfl_v_hor[row][col] = level;
+ }
}
-#endif // CONFIG_LOOPFILTER_LEVEL
+ }
- if (!lf->mode_ref_delta_enabled) {
-// we could get rid of this if we assume that deltas are set to
-// zero when not in use; encoder always uses deltas
-#if CONFIG_LOOPFILTER_LEVEL
- memset(lfi->lvl[seg_id][dir], lvl_seg, sizeof(lfi->lvl[seg_id][dir]));
-#else
- memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
-#endif // CONFIG_LOOPFILTER_LEVEL
+ for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
+ for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
+ // do not filter frame boundary
+ if (is_frame_boundary(cm, plane, r, c, ssx, ssy, dir)) continue;
+
+ uint64_t mask[4] = { 0 };
+ const int prev_row = dir == VERT_EDGE ? r : r - (1 << ssy);
+ const int prev_col = dir == VERT_EDGE ? c - (1 << ssx) : c;
+ MB_MODE_INFO **mi_prev =
+ cm->mi_grid_visible + prev_row * cm->mi_stride + prev_col;
+ const MB_MODE_INFO *const mbmi_prev = mi_prev[0];
+ const int prev_skip = mbmi_prev->skip && is_inter_block(mbmi_prev);
+ const uint8_t level_prev =
+ get_filter_level(cm, &cm->lf_info, dir, plane, mbmi_prev);
+ const int is_edge =
+ (level || level_prev) &&
+ (!curr_skip || !prev_skip || is_coding_block_border);
+
+ if (is_edge) {
+ const TX_SIZE prev_tx_size =
+ plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy)
+ : mbmi_prev->tx_size;
+ const TX_SIZE min_tx_size =
+ (dir == VERT_EDGE) ? AOMMIN(txsize_horz_map[tx_size],
+ txsize_horz_map[prev_tx_size])
+ : AOMMIN(txsize_vert_map[tx_size],
+ txsize_vert_map[prev_tx_size]);
+ assert(min_tx_size < TX_SIZES);
+ const int row = r % MI_SIZE_64X64;
+ const int col = c % MI_SIZE_64X64;
+ int index = 0;
+ const int shift = get_index_shift(col, row, &index);
+ assert(index < 4 && index >= 0);
+ mask[index] |= ((uint64_t)1 << shift);
+ // set mask on corresponding bit
+ update_masks(dir, plane, mask, min_tx_size, lfm);
+ }
+ }
+ }
+ }
+}
+
+static void setup_tx_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int plane, int ssx, int ssy) {
+ blk_row <<= ssy;
+ blk_col <<= ssx;
+ if (((mi_row + blk_row) << MI_SIZE_LOG2) >= cm->height ||
+ ((mi_col + blk_col) << MI_SIZE_LOG2) >= cm->width)
+ return;
+
+ // U/V plane, tx_size is always the largest size
+ if (plane) {
+ assert(tx_size_wide[tx_size] <= 32 && tx_size_high[tx_size] <= 32);
+ setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
+ tx_size);
+ return;
+ }
+
+ MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
+ const MB_MODE_INFO *const mbmi = mi[0];
+ // For Y plane:
+ // If intra block, tx size is univariant.
+ // If inter block, tx size follows inter_tx_size.
+ TX_SIZE plane_tx_size = tx_size;
+ const int is_inter = is_inter_block(mbmi);
+
+ if (plane == 0) {
+ if (is_inter) {
+ if (mbmi->skip) {
+ // TODO(chengchen): change av1_get_transform_size() to be consistant.
+ // plane_tx_size = get_max_rect_tx_size(plane_bsize);
+ plane_tx_size = mbmi->tx_size;
} else {
- int ref, mode;
-#if CONFIG_LOOPFILTER_LEVEL
- scale = 1 << (lvl_seg >> 5);
-
- const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
- lfi->lvl[seg_id][dir][INTRA_FRAME][0] =
- clamp(intra_lvl, 0, MAX_LOOP_FILTER);
-
- for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
- for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
- const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
- lf->mode_deltas[mode] * scale;
- lfi->lvl[seg_id][dir][ref][mode] =
- clamp(inter_lvl, 0, MAX_LOOP_FILTER);
- }
+ plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
+ plane_bsize, blk_row, blk_col)];
+ }
+ } else {
+ MB_MODE_INFO **mi_this = cm->mi_grid_visible +
+ (mi_row + blk_row) * cm->mi_stride + mi_col +
+ blk_col;
+ const MB_MODE_INFO *const mbmi_this = mi_this[0];
+ plane_tx_size = mbmi_this->tx_size;
+ }
+ }
+
+ assert(txsize_to_bsize[plane_tx_size] <= plane_bsize);
+
+ if (plane || plane_tx_size == tx_size) {
+ setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
+ tx_size);
+ } else {
+ const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+ const int bsw = tx_size_wide_unit[sub_txs];
+ const int bsh = tx_size_high_unit[sub_txs];
+ for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
+ for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
+ const int offsetr = blk_row + row;
+ const int offsetc = blk_col + col;
+ setup_tx_block_mask(cm, mi_row, mi_col, offsetr, offsetc, plane_bsize,
+ sub_txs, plane, ssx, ssy);
+ }
+ }
+ }
+}
+
+static void setup_fix_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
+ int plane, int ssx, int ssy) {
+ MB_MODE_INFO **mi =
+ cm->mi_grid_visible + (mi_row | ssy) * cm->mi_stride + (mi_col | ssx);
+ const MB_MODE_INFO *const mbmi = mi[0];
+
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
+ const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
+
+ const int block_width = mi_size_wide[plane_bsize];
+ const int block_height = mi_size_high[plane_bsize];
+
+ TX_SIZE max_txsize = max_txsize_rect_lookup[plane_bsize];
+ // The decoder is designed so that it can process 64x64 luma pixels at a
+ // time. If this is a chroma plane with subsampling and bsize corresponds to
+ // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
+ // mustn't be used for the subsampled plane (because it would be bigger than
+ // a 64x64 luma block) so we round down to TX_32X32.
+ if (plane && txsize_sqr_up_map[max_txsize] == TX_64X64) {
+ if (max_txsize == TX_16X64)
+ max_txsize = TX_16X32;
+ else if (max_txsize == TX_64X16)
+ max_txsize = TX_32X16;
+ else
+ max_txsize = TX_32X32;
+ }
+
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize];
+ const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+ const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
+ const BLOCK_SIZE max_unit_bsize = ss_size_lookup[BLOCK_64X64][ssx][ssy];
+ int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+ int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+
+ mu_blocks_wide = AOMMIN(block_width, mu_blocks_wide);
+ mu_blocks_high = AOMMIN(block_height, mu_blocks_high);
+
+ // Y: Largest tx_size is 64x64, while superblock size can be 128x128.
+ // Here we ensure that setup_tx_block_mask process at most a 64x64 block.
+ // U/V: largest tx size is 32x32.
+ for (int idy = 0; idy < block_height; idy += mu_blocks_high) {
+ for (int idx = 0; idx < block_width; idx += mu_blocks_wide) {
+ const int unit_height = AOMMIN(mu_blocks_high + idy, block_height);
+ const int unit_width = AOMMIN(mu_blocks_wide + idx, block_width);
+ for (int blk_row = idy; blk_row < unit_height; blk_row += bh) {
+ for (int blk_col = idx; blk_col < unit_width; blk_col += bw) {
+ setup_tx_block_mask(cm, mi_row, mi_col, blk_row, blk_col, plane_bsize,
+ max_txsize, plane, ssx, ssy);
}
-#else
- (void)default_filt_lvl_r;
- const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
- lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
-
- for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
- for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
- const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
- lf->mode_deltas[mode] * scale;
- lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ }
+ }
+ }
+}
+
+static void setup_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int plane, int ssx, int ssy) {
+ if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
+ (mi_col << MI_SIZE_LOG2) >= cm->width)
+ return;
+
+ const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+ const int hbs = mi_size_wide[bsize] / 2;
+ const int quarter_step = mi_size_wide[bsize] / 4;
+ const int allow_sub8x8 = (ssx || ssy) ? bsize > BLOCK_8X8 : 1;
+ const int has_next_row =
+ (((mi_row + hbs) << MI_SIZE_LOG2) < cm->height) & allow_sub8x8;
+ const int has_next_col =
+ (((mi_col + hbs) << MI_SIZE_LOG2) < cm->width) & allow_sub8x8;
+ int i;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ break;
+ case PARTITION_HORZ:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
+ break;
+ case PARTITION_VERT:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_col)
+ setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
+ break;
+ case PARTITION_SPLIT:
+ setup_block_mask(cm, mi_row, mi_col, subsize, plane, ssx, ssy);
+ if (has_next_col)
+ setup_block_mask(cm, mi_row, mi_col + hbs, subsize, plane, ssx, ssy);
+ if (has_next_row)
+ setup_block_mask(cm, mi_row + hbs, mi_col, subsize, plane, ssx, ssy);
+ if (has_next_col & has_next_row)
+ setup_block_mask(cm, mi_row + hbs, mi_col + hbs, subsize, plane, ssx,
+ ssy);
+ break;
+ case PARTITION_HORZ_A:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_col)
+ setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
+ if (has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
+ break;
+ case PARTITION_HORZ_B:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
+ if (has_next_col & has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
+ break;
+ case PARTITION_VERT_A:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
+ if (has_next_col)
+ setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
+ break;
+ case PARTITION_VERT_B:
+ setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
+ if (has_next_col)
+ setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
+ if (has_next_row)
+ setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
+ break;
+ case PARTITION_HORZ_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && (this_mi_row << MI_SIZE_LOG2) >= cm->height) break;
+ // chroma plane filter the odd location
+ if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
+
+ setup_fix_block_mask(cm, this_mi_row, mi_col, plane, ssx, ssy);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+ // chroma plane filter the odd location
+ if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
+
+ setup_fix_block_mask(cm, mi_row, this_mi_col, plane, ssx, ssy);
+ }
+ break;
+ default: assert(0);
+ }
+}
+
+// TODO(chengchen): if lossless, do not need to setup mask. But when
+// segments enabled, each segment has different lossless settings.
+void av1_setup_bitmask(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
+ int subsampling_x, int subsampling_y, int row_end,
+ int col_end) {
+ const int num_64x64 = cm->seq_params.mib_size >> MIN_MIB_SIZE_LOG2;
+ for (int y = 0; y < num_64x64; ++y) {
+ for (int x = 0; x < num_64x64; ++x) {
+ const int row = mi_row + y * MI_SIZE_64X64;
+ const int col = mi_col + x * MI_SIZE_64X64;
+ if (row >= row_end || col >= col_end) continue;
+ if ((row << MI_SIZE_LOG2) >= cm->height ||
+ (col << MI_SIZE_LOG2) >= cm->width)
+ continue;
+
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
+ if (lfm == NULL) return;
+
+ // init mask to zero
+ if (plane == 0) {
+ av1_zero(lfm->left_y);
+ av1_zero(lfm->above_y);
+ av1_zero(lfm->lfl_y_ver);
+ av1_zero(lfm->lfl_y_hor);
+ } else if (plane == 1) {
+ av1_zero(lfm->left_u);
+ av1_zero(lfm->above_u);
+ av1_zero(lfm->lfl_u_ver);
+ av1_zero(lfm->lfl_u_hor);
+ } else {
+ av1_zero(lfm->left_v);
+ av1_zero(lfm->above_v);
+ av1_zero(lfm->lfl_v_ver);
+ av1_zero(lfm->lfl_v_hor);
+ }
+ }
+ }
+
+ // set up bitmask for each superblock
+ setup_block_mask(cm, mi_row, mi_col, cm->seq_params.sb_size, plane,
+ subsampling_x, subsampling_y);
+
+ for (int y = 0; y < num_64x64; ++y) {
+ for (int x = 0; x < num_64x64; ++x) {
+ const int row = mi_row + y * MI_SIZE_64X64;
+ const int col = mi_col + x * MI_SIZE_64X64;
+ if (row >= row_end || col >= col_end) continue;
+ if ((row << MI_SIZE_LOG2) >= cm->height ||
+ (col << MI_SIZE_LOG2) >= cm->width)
+ continue;
+
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
+ if (lfm == NULL) return;
+
+ // check if the mask is valid
+ check_loop_filter_masks(lfm, plane);
+
+ {
+ // Let 16x16 hold 32x32 (Y/U/V) and 64x64(Y only).
+ // Even tx size is greater, we only apply max length filter, which
+ // is 16.
+ if (plane == 0) {
+ for (int j = 0; j < 4; ++j) {
+ lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_32X32].bits[j];
+ lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_64X64].bits[j];
+ lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_32X32].bits[j];
+ lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_64X64].bits[j];
+
+ // set 32x32 and 64x64 to 0
+ lfm->left_y[TX_32X32].bits[j] = 0;
+ lfm->left_y[TX_64X64].bits[j] = 0;
+ lfm->above_y[TX_32X32].bits[j] = 0;
+ lfm->above_y[TX_64X64].bits[j] = 0;
+ }
+ } else if (plane == 1) {
+ for (int j = 0; j < 4; ++j) {
+ lfm->left_u[TX_16X16].bits[j] |= lfm->left_u[TX_32X32].bits[j];
+ lfm->above_u[TX_16X16].bits[j] |= lfm->above_u[TX_32X32].bits[j];
+
+ // set 32x32 to 0
+ lfm->left_u[TX_32X32].bits[j] = 0;
+ lfm->above_u[TX_32X32].bits[j] = 0;
+ }
+ } else {
+ for (int j = 0; j < 4; ++j) {
+ lfm->left_v[TX_16X16].bits[j] |= lfm->left_v[TX_32X32].bits[j];
+ lfm->above_v[TX_16X16].bits[j] |= lfm->above_v[TX_32X32].bits[j];
+
+ // set 32x32 to 0
+ lfm->left_v[TX_32X32].bits[j] = 0;
+ lfm->above_v[TX_32X32].bits[j] = 0;
}
}
-#endif
}
+
+ // check if the mask is valid
+ check_loop_filter_masks(lfm, plane);
}
}
}
-static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
- int pitch, unsigned int mask_16x16_l,
- unsigned int mask_8x8_l,
- unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l,
- const loop_filter_info_n *lfi_n,
- const uint8_t *lfl) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
- const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
- unsigned int mask;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
+static void filter_selectively_vert_row2(
+ int subsampling_factor, uint8_t *s, int pitch, int plane,
+ uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
+ uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
+ const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
+ uint64_t mask;
+ const int step = 1 << subsampling_factor;
+
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
+ mask_8x8_1 | mask_4x4_1;
+ mask; mask >>= step) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
+ // chroma plane filters less pixels introduced in deblock_13tap
+ // experiment
+ LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
+
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- aom_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ if (plane) {
+ // TODO(any): add aom_lpf_vertical_6_dual for chroma plane.
+ aom_lpf_vertical_6(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ aom_lpf_vertical_6(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ } else {
+ // TODO(any): add dual function simd function. Current sse2 code
+ // just called aom_lpf_vertical_14_sse2 twice.
+ aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
} else if (mask_16x16_0 & 1) {
- aom_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
} else {
- aom_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
+ // chroma plane filters less pixels introduced in deblock_13tap
+ // experiment
+ LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
+
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ if (plane) {
+ aom_lpf_vertical_6(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ aom_lpf_vertical_6(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ } else {
+ aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
} else if (mask_8x8_0 & 1) {
- aom_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
} else {
- aom_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
+ lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
}
}
@@ -898,90 +1029,86 @@ static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
} else if (mask_4x4_0 & 1) {
aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
} else {
- aom_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- }
-
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- aom_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_int_0 & 1) {
- aom_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
- } else {
- aom_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
}
}
}
- s += 8;
- lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ s += 4;
+ lfl += step;
+ lfl2 += step;
+ mask_16x16_0 >>= step;
+ mask_8x8_0 >>= step;
+ mask_4x4_0 >>= step;
+ mask_16x16_1 >>= step;
+ mask_8x8_1 >>= step;
+ mask_4x4_1 >>= step;
}
}
-#if CONFIG_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(
- int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
- unsigned int mask_8x8_l, unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
- const uint8_t *lfl, int bd) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
- const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
- unsigned int mask;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
+ int subsampling_factor, uint16_t *s, int pitch, int plane,
+ uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
+ uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
+ const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
+ uint64_t mask;
+ const int step = 1 << subsampling_factor;
+
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
+ mask_8x8_1 | mask_4x4_1;
+ mask; mask >>= step) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
+ // chroma plane filters less pixels introduced in deblock_13tap
+ // experiment
+ HbdLpfFunc highbd_lpf_vertical =
+ plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
+
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- aom_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ if (plane) {
+ aom_highbd_lpf_vertical_6(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ aom_highbd_lpf_vertical_6(s + 4 * pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ } else {
+ aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
} else if (mask_16x16_0 & 1) {
- aom_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ bd);
} else {
- aom_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
+ HbdLpfFunc highbd_lpf_vertical =
+ plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
+
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
+ if (plane) {
+ aom_highbd_lpf_vertical_6(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ aom_highbd_lpf_vertical_6(s + 4 * pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ } else {
+ aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
} else if (mask_8x8_0 & 1) {
- aom_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ bd);
} else {
- aom_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, bd);
}
}
@@ -994,1925 +1121,396 @@ static void highbd_filter_selectively_vert_row2(
aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else {
- aom_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- }
- }
-
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- aom_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_int_0 & 1) {
- aom_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
- } else {
- aom_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
+ aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
}
}
}
- s += 8;
- lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ s += 4;
+ lfl += step;
+ lfl2 += step;
+ mask_16x16_0 >>= step;
+ mask_8x8_0 >>= step;
+ mask_4x4_0 >>= step;
+ mask_16x16_1 >>= step;
+ mask_8x8_1 >>= step;
+ mask_4x4_1 >>= step;
}
}
-#endif // CONFIG_HIGHBITDEPTH
-
-static void filter_selectively_horiz(
- uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
- unsigned int mask_4x4, unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n, const uint8_t *lfl
-#if CONFIG_LPF_DIRECT
- ,
- uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
- int width, int height, int ss_x, int ss_y
-#endif
- ) {
- unsigned int mask;
+
+static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
+ int subsampling, uint64_t mask_16x16,
+ uint64_t mask_8x8, uint64_t mask_4x4,
+ const loop_filter_info_n *lfi_n,
+ const uint8_t *lfl) {
+ uint64_t mask;
int count;
-#if CONFIG_LPF_DIRECT
- // scale for u, v plane
- width >>= ss_x;
- height >>= ss_y;
- int idx_c = 0;
-#endif
+ const int step = 1 << subsampling;
+ const unsigned int two_block_mask = subsampling ? 5 : 3;
- for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
- mask >>= count) {
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
count = 1;
if (mask & 1) {
-#if CONFIG_LPF_DIRECT
- int i;
- const int line_length = 16;
- const int pivot = 8;
- const int above_filt_len = mask_16x16 & 1 ? 8 : 4;
- const int bot_filt_len = mask_16x16 & 1 ? 8 : 4;
- uint8_t block[256]; // line_length * size_of(BLOCK_8X8) * two_blocks
- int orig_pos[256];
- int direct;
-
- assert(above_filt_len == bot_filt_len);
- (void)bot_filt_len;
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- // actual position for current pixel
- const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
- const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
-
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
-
if (mask_16x16 & 1) {
- if ((mask_16x16 & 3) == 3) {
- // Could use asymmetric length in the future
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 2, 1);
-
- pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
- width, height, pitch, pivot, line_length, 2,
- direct);
-
- aom_lpf_horizontal_edge_16(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
- count = 2;
- } else {
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
- width, height, pitch, pivot, line_length, 1,
- direct);
-
- aom_lpf_horizontal_edge_8(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
- }
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- } else if (mask_8x8 & 1) {
- if ((mask_8x8 & 3) == 3) {
- count = 2;
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 2, 1);
-
- pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
- width, height, pitch, pivot, line_length, 2,
- direct);
-
- aom_lpf_horizontal_8_dual(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
-
- if ((mask_4x4_int & 3) == 3) {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 2, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
- height, pitch, pivot, line_length, 2,
- direct);
-
- aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- } else {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- if (mask_4x4_int & 1) {
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
- width, height, pitch, pivot, line_length,
- 1, direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
- } else if (mask_4x4_int & 2) {
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
- width, height, pitch, pivot, line_length,
- 1, direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfin->mblim, lfin->lim, lfin->hev_thr);
- }
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- } else {
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
- width, height, pitch, pivot, line_length, 1,
- direct);
-
- aom_lpf_horizontal_8(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
-
- if (mask_4x4_int & 1) {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
- height, pitch, pivot, line_length, 1,
- direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- }
- } else if (mask_4x4 & 1) {
- if ((mask_4x4 & 3) == 3) {
- count = 2;
- direct = pick_min_grad_direct(src, 4, row, col, width, height, pitch,
- 2, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row, col, width,
- height, pitch, pivot, line_length, 2, direct);
-
- aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
-
- if ((mask_4x4_int & 3) == 3) {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 2, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
- height, pitch, pivot, line_length, 2,
- direct);
-
- aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- } else {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- if (mask_4x4_int & 1) {
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
- width, height, pitch, pivot, line_length,
- 1, direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
- } else if (mask_4x4_int & 2) {
- direct = pick_min_grad_direct(src, 4, row, col, width, height,
- pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
- width, height, pitch, pivot, line_length,
- 1, direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfin->mblim, lfin->lim, lfin->hev_thr);
- }
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- } else {
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
- width, height, pitch, pivot, line_length, 1,
- direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
-
- if (mask_4x4_int & 1) {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
- direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
- height, pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
- height, pitch, pivot, line_length, 1,
- direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- }
- } else if (mask_4x4_int & 1) {
- direct =
- pick_min_grad_direct(src, 4, row, col, width, height, pitch, 1, 1);
-
- pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
- height, pitch, pivot, line_length, 1, direct);
-
- aom_lpf_horizontal_4(block + pivot * line_length, line_length,
- lfi->mblim, lfi->lim, lfi->hev_thr);
-
- for (i = 0; i < 256; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
-#else // CONFIG_LPF_DIRECT
- if (mask_16x16 & 1) {
- if ((mask_16x16 & 3) == 3) {
- aom_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ // chroma plane filters less pixels introduced in deblock_13tap
+ // experiment
+ LpfFunc lpf_horizontal =
+ plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
+
+ if ((mask_16x16 & two_block_mask) == two_block_mask) {
+ /*
+ aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
+ */
+
+ lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ lpf_horizontal(s + 4, pitch, lfin->mblim, lfin->lim, lfin->hev_thr);
count = 2;
} else {
- aom_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
+ lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
}
} else if (mask_8x8 & 1) {
- if ((mask_8x8 & 3) == 3) {
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ // chroma plane filters less pixels introduced in deblock_13tap
+ // experiment
+ LpfFunc lpf_horizontal =
+ plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
+ if ((mask_8x8 & two_block_mask) == two_block_mask) {
+ /*
aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
+ */
- if ((mask_4x4_int & 3) == 3) {
- aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr);
- } else {
- if (mask_4x4_int & 1)
- aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
- else if (mask_4x4_int & 2)
- aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr);
- }
+ lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ lpf_horizontal(s + 4, pitch, lfin->mblim, lfin->lim, lfin->hev_thr);
count = 2;
} else {
- aom_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
-
- if (mask_4x4_int & 1)
- aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
+ lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
}
} else if (mask_4x4 & 1) {
- if ((mask_4x4 & 3) == 3) {
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
-
+ if ((mask_4x4 & two_block_mask) == two_block_mask) {
+ /*
aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
-
- if ((mask_4x4_int & 3) == 3) {
- aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr);
- } else {
- if (mask_4x4_int & 1)
- aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
- else if (mask_4x4_int & 2)
- aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr);
- }
+ */
+ aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ aom_lpf_horizontal_4(s + 4, pitch, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
count = 2;
} else {
aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
-
- if (mask_4x4_int & 1)
- aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
}
- } else if (mask_4x4_int & 1) {
- aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
}
-#endif // CONFIG_LPF_DIRECT
}
-#if CONFIG_LPF_DIRECT
- idx_c += col_step * count;
-#endif
- s += 8 * count;
- lfl += count;
- mask_16x16 >>= count;
- mask_8x8 >>= count;
- mask_4x4 >>= count;
- mask_4x4_int >>= count;
+
+ s += 4 * count;
+ lfl += step * count;
+ mask_16x16 >>= step * count;
+ mask_8x8 >>= step * count;
+ mask_4x4 >>= step * count;
}
}
-#if CONFIG_HIGHBITDEPTH
static void highbd_filter_selectively_horiz(
- uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
- unsigned int mask_4x4, unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
- unsigned int mask;
+ uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
+ uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
+ uint8_t *lfl, int bd) {
+ uint64_t mask;
int count;
+ const int step = 1 << subsampling;
+ const unsigned int two_block_mask = subsampling ? 5 : 3;
- for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
- mask >>= count) {
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
count = 1;
if (mask & 1) {
if (mask_16x16 & 1) {
- if ((mask_16x16 & 3) == 3) {
- aom_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ HbdLpfFunc highbd_lpf_horizontal =
+ plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
+
+ if ((mask_16x16 & two_block_mask) == two_block_mask) {
+ /*
+ aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
+ */
+
+ highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
+ highbd_lpf_horizontal(s + 4, pitch, lfin->mblim, lfin->lim,
+ lfin->hev_thr, bd);
count = 2;
} else {
- aom_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, bd);
+ highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
}
} else if (mask_8x8 & 1) {
- if ((mask_8x8 & 3) == 3) {
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ HbdLpfFunc highbd_lpf_horizontal =
+ plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
+ if ((mask_8x8 & two_block_mask) == two_block_mask) {
+ /*
aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
-
- if ((mask_4x4_int & 3) == 3) {
- aom_highbd_lpf_horizontal_4_dual(
- s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr, bd);
- } else {
- if (mask_4x4_int & 1) {
- aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, bd);
- } else if (mask_4x4_int & 2) {
- aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- }
- }
+ */
+ highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
+ highbd_lpf_horizontal(s + 4, pitch, lfin->mblim, lfin->lim,
+ lfin->hev_thr, bd);
count = 2;
} else {
- aom_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, bd);
-
- if (mask_4x4_int & 1) {
- aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, bd);
- }
+ highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
}
} else if (mask_4x4 & 1) {
- if ((mask_4x4 & 3) == 3) {
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
-
+ if ((mask_4x4 & two_block_mask) == two_block_mask) {
+ /*
aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
- if ((mask_4x4_int & 3) == 3) {
- aom_highbd_lpf_horizontal_4_dual(
- s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim, lfin->hev_thr, bd);
- } else {
- if (mask_4x4_int & 1) {
- aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, bd);
- } else if (mask_4x4_int & 2) {
- aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- }
- }
+ */
+ aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+ aom_highbd_lpf_horizontal_4(s + 4, pitch, lfin->mblim, lfin->lim,
+ lfin->hev_thr, bd);
count = 2;
} else {
aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
-
- if (mask_4x4_int & 1) {
- aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, bd);
- }
}
- } else if (mask_4x4_int & 1) {
- aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, bd);
}
}
- s += 8 * count;
- lfl += count;
- mask_16x16 >>= count;
- mask_8x8 >>= count;
- mask_4x4 >>= count;
- mask_4x4_int >>= count;
- }
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-// This function ors into the current lfm structure, where to do loop
-// filters for the specific mi we are looking at. It uses information
-// including the block_size_type (32x16, 32x32, etc.), the transform size,
-// whether there were any coefficients encoded, and the loop filter strength
-// block we are currently looking at. Shift is used to position the
-// 1's we produce.
-// TODO(JBB) Need another function for different resolution color..
-static void build_masks(AV1_COMMON *const cm,
- const loop_filter_info_n *const lfi_n,
- const MODE_INFO *mi, const int shift_y,
- const int shift_uv, LOOP_FILTER_MASK *lfm) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE block_size = mbmi->sb_type;
- // TODO(debargha): Check if masks can be setup correctly when
- // rectangular transfroms are used with the EXT_TX expt.
- const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
- const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
- const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
- const TX_SIZE tx_size_uv =
- txsize_sqr_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
- const TX_SIZE tx_size_uv_left =
- txsize_horz_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
- const TX_SIZE tx_size_uv_above =
- txsize_vert_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
-#else
-#if CONFIG_LPF_SB
- const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
-#else
- const int filter_level = get_filter_level(cm, lfi_n, mbmi);
-#endif // CONFIG_LPF_SB
-#endif
-#else
- const int filter_level = get_filter_level(lfi_n, mbmi);
- (void)cm;
-#endif
- uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
- uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
- uint64_t *const int_4x4_y = &lfm->int_4x4_y;
- uint16_t *const left_uv = &lfm->left_uv[tx_size_uv_left];
- uint16_t *const above_uv = &lfm->above_uv[tx_size_uv_above];
- uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
- int i;
-
- // If filter level is 0 we don't loop filter.
- if (!filter_level) {
- return;
- } else {
- const int w = num_8x8_blocks_wide_lookup[block_size];
- const int h = num_8x8_blocks_high_lookup[block_size];
- const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
- const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
-
- for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
- }
-
- // These set 1 in the current block size for the block size edges.
- // For instance if the block size is 32x16, we'll set:
- // above = 1111
- // 0000
- // and
- // left = 1000
- // = 1000
- // NOTE : In this example the low bit is left most ( 1000 ) is stored as
- // 1, not 8...
- //
- // U and V set things on a 16 bit scale.
- //
- *above_y |= above_prediction_mask[block_size] << shift_y;
- *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
- *left_y |= left_prediction_mask[block_size] << shift_y;
- *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
-
- // If the block has no coefficients and is not intra we skip applying
- // the loop filter on block edges.
- if (mbmi->skip && is_inter_block(mbmi)) return;
-
- // Here we are adding a mask for the transform size. The transform
- // size mask is set to be correct for a 64x64 prediction block size. We
- // mask to match the size of the block we are working on and then shift it
- // into place..
- *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
- << shift_y;
- *above_uv |=
- (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv_above])
- << shift_uv;
-
- *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
- << shift_y;
- *left_uv |=
- (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv_left])
- << shift_uv;
-
- // Here we are trying to determine what to do with the internal 4x4 block
- // boundaries. These differ from the 4x4 boundaries on the outside edge of
- // an 8x8 in that the internal ones can be skipped and don't depend on
- // the prediction block size.
- if (tx_size_y == TX_4X4)
- *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
-
- if (tx_size_uv == TX_4X4)
- *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
-}
-
-// This function does the same thing as the one above with the exception that
-// it only affects the y masks. It exists because for blocks < 16x16 in size,
-// we only update u and v masks on the first block.
-static void build_y_mask(AV1_COMMON *const cm,
- const loop_filter_info_n *const lfi_n,
- const MODE_INFO *mi, const int shift_y,
-#if CONFIG_SUPERTX
- int supertx_enabled,
-#endif // CONFIG_SUPERTX
- LOOP_FILTER_MASK *lfm) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
- const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
- const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
-#if CONFIG_SUPERTX
- const BLOCK_SIZE block_size =
- supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
-#else
- const BLOCK_SIZE block_size = mbmi->sb_type;
-#endif
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
-#else
-#if CONFIG_LPF_SB
- const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
-#else
- const int filter_level = get_filter_level(cm, lfi_n, mbmi);
-#endif // CONFIG_LPF_SB
-#endif
-#else
- const int filter_level = get_filter_level(lfi_n, mbmi);
- (void)cm;
-#endif
- uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
- uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
- uint64_t *const int_4x4_y = &lfm->int_4x4_y;
- int i;
-
- if (!filter_level) {
- return;
- } else {
- const int w = num_8x8_blocks_wide_lookup[block_size];
- const int h = num_8x8_blocks_high_lookup[block_size];
- const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
- const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
- for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
+ s += 4 * count;
+ lfl += step * count;
+ mask_16x16 >>= step * count;
+ mask_8x8 >>= step * count;
+ mask_4x4 >>= step * count;
}
-
- *above_y |= above_prediction_mask[block_size] << shift_y;
- *left_y |= left_prediction_mask[block_size] << shift_y;
-
- if (mbmi->skip && is_inter_block(mbmi)) return;
-
- *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
- << shift_y;
-
- *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
- << shift_y;
-
- if (tx_size_y == TX_4X4)
- *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
}
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-// This function update the bit masks for the entire 64x64 region represented
-// by mi_row, mi_col. In case one of the edge is a tile boundary, loop filtering
-// for that edge is disabled. This function only check the tile boundary info
-// for the top left corner mi to determine the boundary information for the
-// top and left edge of the whole super block
-static void update_tile_boundary_filter_mask(AV1_COMMON *const cm,
- const int mi_row, const int mi_col,
- LOOP_FILTER_MASK *lfm) {
- int i;
- MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col;
-
- if (mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) {
- for (i = 0; i <= TX_32X32; i++) {
- lfm->left_y[i] &= 0xfefefefefefefefeULL;
- lfm->left_uv[i] &= 0xeeee;
- }
- }
-
- if (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY) {
- for (i = 0; i <= TX_32X32; i++) {
- lfm->above_y[i] &= 0xffffffffffffff00ULL;
- lfm->above_uv[i] &= 0xfff0;
- }
- }
-}
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
-
-// This function sets up the bit masks for the entire 64x64 region represented
-// by mi_row, mi_col.
-// TODO(JBB): This function only works for yv12.
-void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi, const int mode_info_stride,
- LOOP_FILTER_MASK *lfm) {
-#if CONFIG_EXT_PARTITION
- assert(0 && "Not yet updated");
-#endif // CONFIG_EXT_PARTITION
- int idx_32, idx_16, idx_8;
- const loop_filter_info_n *const lfi_n = &cm->lf_info;
- MODE_INFO **mip = mi;
- MODE_INFO **mip2 = mi;
-
- // These are offsets to the next mi in the 64x64 block. It is what gets
- // added to the mi ptr as we go through each loop. It helps us to avoid
- // setting up special row and column counters for each index. The last step
- // brings us out back to the starting position.
- const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
- -(mode_info_stride << 2) - 4 };
- const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
- -(mode_info_stride << 1) - 2 };
- const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
-
- // Following variables represent shifts to position the current block
- // mask over the appropriate block. A shift of 36 to the left will move
- // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
- // 4 rows to the appropriate spot.
- const int shift_32_y[] = { 0, 4, 32, 36 };
- const int shift_16_y[] = { 0, 2, 16, 18 };
- const int shift_8_y[] = { 0, 1, 8, 9 };
- const int shift_32_uv[] = { 0, 2, 8, 10 };
- const int shift_16_uv[] = { 0, 1, 4, 5 };
- int i;
- const int max_rows = AOMMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
- const int max_cols = AOMMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
-
- av1_zero(*lfm);
- assert(mip[0] != NULL);
-
- // TODO(jimbankoski): Try moving most of the following code into decode
- // loop and storing lfm in the mbmi structure so that we don't have to go
- // through the recursive loop structure multiple times.
- switch (mip[0]->mbmi.sb_type) {
- case BLOCK_64X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm); break;
- case BLOCK_64X32: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
-#if CONFIG_SUPERTX && CONFIG_TX64X64
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif // CONFIG_SUPERTX && CONFIG_TX64X64
- mip2 = mip + mode_info_stride * 4;
- if (4 >= max_rows) break;
- build_masks(cm, lfi_n, mip2[0], 32, 8, lfm);
- break;
- case BLOCK_32X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
-#if CONFIG_SUPERTX && CONFIG_TX64X64
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif // CONFIG_SUPERTX && CONFIG_TX64X64
- mip2 = mip + 4;
- if (4 >= max_cols) break;
- build_masks(cm, lfi_n, mip2[0], 4, 2, lfm);
- break;
- default:
-#if CONFIG_SUPERTX && CONFIG_TX64X64
- if (mip[0]->mbmi.tx_size == TX_64X64) {
- build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
- } else {
-#endif // CONFIG_SUPERTX && CONFIG_TX64X64
- for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
- const int shift_y_32 = shift_32_y[idx_32];
- const int shift_uv_32 = shift_32_uv[idx_32];
- const int mi_32_col_offset = ((idx_32 & 1) << 2);
- const int mi_32_row_offset = ((idx_32 >> 1) << 2);
- if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
- continue;
- switch (mip[0]->mbmi.sb_type) {
- case BLOCK_32X32:
- build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
- break;
- case BLOCK_32X16:
- build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
-#if CONFIG_SUPERTX
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif
- if (mi_32_row_offset + 2 >= max_rows) continue;
- mip2 = mip + mode_info_stride * 2;
- build_masks(cm, lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4,
- lfm);
- break;
- case BLOCK_16X32:
- build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
-#if CONFIG_SUPERTX
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif
- if (mi_32_col_offset + 2 >= max_cols) continue;
- mip2 = mip + 2;
- build_masks(cm, lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1,
- lfm);
- break;
- default:
-#if CONFIG_SUPERTX
- if (mip[0]->mbmi.tx_size == TX_32X32) {
- build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
- break;
- }
-#endif
- for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
- const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
- const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
- const int mi_16_col_offset =
- mi_32_col_offset + ((idx_16 & 1) << 1);
- const int mi_16_row_offset =
- mi_32_row_offset + ((idx_16 >> 1) << 1);
-
- if (mi_16_col_offset >= max_cols ||
- mi_16_row_offset >= max_rows)
- continue;
-
- switch (mip[0]->mbmi.sb_type) {
- case BLOCK_16X16:
- build_masks(cm, lfi_n, mip[0], shift_y_32_16,
- shift_uv_32_16, lfm);
- break;
- case BLOCK_16X8:
-#if CONFIG_SUPERTX
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif
- build_masks(cm, lfi_n, mip[0], shift_y_32_16,
- shift_uv_32_16, lfm);
- if (mi_16_row_offset + 1 >= max_rows) continue;
- mip2 = mip + mode_info_stride;
- build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 8,
-#if CONFIG_SUPERTX
- 0,
-#endif
- lfm);
- break;
- case BLOCK_8X16:
-#if CONFIG_SUPERTX
- if (supertx_enabled(&mip[0]->mbmi)) break;
-#endif
- build_masks(cm, lfi_n, mip[0], shift_y_32_16,
- shift_uv_32_16, lfm);
- if (mi_16_col_offset + 1 >= max_cols) continue;
- mip2 = mip + 1;
- build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 1,
-#if CONFIG_SUPERTX
- 0,
-#endif
- lfm);
- break;
- default: {
- const int shift_y_32_16_8_zero =
- shift_y_32_16 + shift_8_y[0];
-#if CONFIG_SUPERTX
- if (mip[0]->mbmi.tx_size == TX_16X16) {
- build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
- shift_uv_32_16, lfm);
- break;
- }
-#endif
- build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
- shift_uv_32_16, lfm);
- mip += offset[0];
- for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
- const int shift_y_32_16_8 =
- shift_y_32_16 + shift_8_y[idx_8];
- const int mi_8_col_offset =
- mi_16_col_offset + ((idx_8 & 1));
- const int mi_8_row_offset =
- mi_16_row_offset + ((idx_8 >> 1));
-
- if (mi_8_col_offset >= max_cols ||
- mi_8_row_offset >= max_rows)
- continue;
- build_y_mask(cm, lfi_n, mip[0], shift_y_32_16_8,
-#if CONFIG_SUPERTX
- supertx_enabled(&mip[0]->mbmi),
-#endif
- lfm);
- }
- break;
- }
- }
- }
- break;
- }
+static int compare_ref_dst(AV1_COMMON *const cm, uint8_t *ref_buf,
+ uint8_t *dst_buf, int ref_stride, int dst_stride,
+ int start, int end) {
+ return 0;
+
+ start <<= MI_SIZE_LOG2;
+ end <<= MI_SIZE_LOG2;
+ uint8_t *ref0 = ref_buf;
+ uint8_t *dst0 = dst_buf;
+ if (cm->use_highbitdepth) {
+ const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref_buf);
+ const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst_buf);
+ for (int j = 0; j < 4; ++j) {
+ for (int i = start; i < end; ++i)
+ if (ref16[i] != dst16[i]) {
+ ref_buf = ref0;
+ dst_buf = dst0;
+ return i + 1;
}
-#if CONFIG_SUPERTX && CONFIG_TX64X64
- }
-#endif // CONFIG_SUPERTX && CONFIG_TX64X64
- break;
- }
- // The largest loopfilter we have is 16x16 so we use the 16x16 mask
- // for 32x32 transforms also.
- lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
- lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
- lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
- lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
-
- // We do at least 8 tap filter on every 32x32 even if the transform size
- // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
- // remove it from the 4x4.
- lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
- lfm->left_y[TX_4X4] &= ~left_border;
- lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
- lfm->above_y[TX_4X4] &= ~above_border;
- lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
- lfm->left_uv[TX_4X4] &= ~left_border_uv;
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
- lfm->above_uv[TX_4X4] &= ~above_border_uv;
-
- // We do some special edge handling.
- if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
- const uint64_t rows = cm->mi_rows - mi_row;
-
- // Each pixel inside the border gets a 1,
- const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
- const uint16_t mask_uv =
- (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
-
- // Remove values completely outside our border.
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= mask_y;
- lfm->above_y[i] &= mask_y;
- lfm->left_uv[i] &= mask_uv;
- lfm->above_uv[i] &= mask_uv;
- }
- lfm->int_4x4_y &= mask_y;
- lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
-
- // We don't apply a wide loop filter on the last uv block row. If set
- // apply the shorter one instead.
- if (rows == 1) {
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
- lfm->above_uv[TX_16X16] = 0;
- }
- if (rows == 5) {
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
- lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
+ ref16 += ref_stride;
+ dst16 += dst_stride;
}
} else {
- lfm->above_int_4x4_uv = lfm->left_int_4x4_uv;
- }
-
- if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
- const uint64_t columns = cm->mi_cols - mi_col;
-
- // Each pixel inside the border gets a 1, the multiply copies the border
- // to where we need it.
- const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
- const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
-
- // Internal edges are not applied on the last column of the image so
- // we mask 1 more for the internal edges
- const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
-
- // Remove the bits outside the image edge.
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= mask_y;
- lfm->above_y[i] &= mask_y;
- lfm->left_uv[i] &= mask_uv;
- lfm->above_uv[i] &= mask_uv;
- }
- lfm->int_4x4_y &= mask_y;
- lfm->left_int_4x4_uv &= mask_uv_int;
-
- // We don't apply a wide loop filter on the last uv column. If set
- // apply the shorter one instead.
- if (columns == 1) {
- lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
- lfm->left_uv[TX_16X16] = 0;
- }
- if (columns == 5) {
- lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
- lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
- }
- }
- // We don't apply a loop filter on the first column in the image, mask that
- // out.
- if (mi_col == 0) {
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= 0xfefefefefefefefeULL;
- lfm->left_uv[i] &= 0xeeee;
- }
- }
-
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
- if (av1_disable_loopfilter_on_tile_boundary(cm)) {
- update_tile_boundary_filter_mask(cm, mi_row, mi_col, lfm);
- }
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
-
- // Assert if we try to apply 2 different loop filters at the same position.
- assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
- assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
- assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
- assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
- assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
- assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
- assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
- assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
- assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
- assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
- assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
- assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
- assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
- assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
- assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
- assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
-}
-
-static void filter_selectively_vert(
- uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
- unsigned int mask_4x4, unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n, const uint8_t *lfl
-#if CONFIG_LPF_DIRECT
- ,
- uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
- int width, int height, int ss_x, int ss_y
-#endif
- ) {
- unsigned int mask;
-#if CONFIG_LPF_DIRECT
- // scale for u, v plane
- width >>= ss_x;
- height >>= ss_y;
- int idx_c = 0;
-#endif
-
- for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
- mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
-#if CONFIG_LPF_DIRECT
- int i;
- const int pivot = 8;
- const int left_filt_len = mask_16x16 & 1 ? 8 : 4;
- const int right_filt_len = mask_16x16 & 1 ? 8 : 4;
- const int line_length = 16;
- uint8_t block[128];
- int orig_pos[128];
-
- // actual position for current pixel
- const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
- const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
-
- // Could use asymmetric length in the future
- assert(left_filt_len == right_filt_len);
- (void)right_filt_len;
-
- if ((mask_16x16 & 1) || (mask_8x8 & 1) || (mask_4x4 & 1)) {
- for (i = 0; i < 128; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- const int direct = pick_min_grad_direct(src, left_filt_len, row, col,
- width, height, pitch, 1, 0);
-
- pick_filter_block_vert(src, block, orig_pos, left_filt_len, row, col,
- width, height, pitch, pivot, line_length, 1,
- direct);
-
- // apply filtering
- if (mask_16x16 & 1) {
- aom_lpf_vertical_16(block + pivot, line_length, lfi->mblim, lfi->lim,
- lfi->hev_thr);
- } else if (mask_8x8 & 1) {
- aom_lpf_vertical_8(block + pivot, line_length, lfi->mblim, lfi->lim,
- lfi->hev_thr);
- } else if (mask_4x4 & 1) {
- aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
- lfi->hev_thr);
- }
-
- for (i = 0; i < 128; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
-
- // filter inner 4x4
- if (mask_4x4_int & 1) {
- for (i = 0; i < 128; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- const int direct = pick_min_grad_direct(src, 4, row, col + 4, width,
- height, pitch, 1, 0);
-
- pick_filter_block_vert(src, block, orig_pos, 4, row, col + 4, width,
- height, pitch, pivot, line_length, 1, direct);
-
- aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
- lfi->hev_thr);
-
- for (i = 0; i < 128; ++i)
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
-#else
- if (mask & 1) {
- if (mask_16x16 & 1) {
- aom_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- } else if (mask_8x8 & 1) {
- aom_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- } else if (mask_4x4 & 1) {
- aom_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- }
- }
- if (mask_4x4_int & 1)
- aom_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
-#endif // CONFIG_LPF_DIRECT
-#if CONFIG_LPF_DIRECT
- idx_c += col_step;
-#endif
- s += 8;
- lfl += 1;
- mask_16x16 >>= 1;
- mask_8x8 >>= 1;
- mask_4x4 >>= 1;
- mask_4x4_int >>= 1;
- }
-}
-
-#if CONFIG_HIGHBITDEPTH
-static void highbd_filter_selectively_vert(
- uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
- unsigned int mask_4x4, unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
- unsigned int mask;
-
- for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
- mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
- if (mask & 1) {
- if (mask_16x16 & 1) {
- aom_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- bd);
- } else if (mask_8x8 & 1) {
- aom_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- bd);
- } else if (mask_4x4 & 1) {
- aom_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- bd);
- }
- }
- if (mask_4x4_int & 1)
- aom_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, bd);
- s += 8;
- lfl += 1;
- mask_16x16 >>= 1;
- mask_8x8 >>= 1;
- mask_4x4 >>= 1;
- mask_4x4_int >>= 1;
- }
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-typedef struct {
- unsigned int m16x16;
- unsigned int m8x8;
- unsigned int m4x4;
-} FilterMasks;
-
-// Get filter level and masks for the given row index 'idx_r'. (Only used for
-// the non420 case).
-// Note: 'row_masks_ptr' and/or 'col_masks_ptr' can be passed NULL.
-static void get_filter_level_and_masks_non420(
- AV1_COMMON *const cm, const struct macroblockd_plane *const plane, int pl,
- MODE_INFO **mib, int mi_row, int mi_col, int idx_r, uint8_t *const lfl_r,
- unsigned int *const mask_4x4_int_r_ptr,
- unsigned int *const mask_4x4_int_c_ptr, FilterMasks *const row_masks_ptr,
- FilterMasks *const col_masks_ptr) {
- const int ss_x = plane->subsampling_x;
- const int ss_y = plane->subsampling_y;
- const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
- FilterMasks row_masks, col_masks;
- memset(&row_masks, 0, sizeof(row_masks));
- memset(&col_masks, 0, sizeof(col_masks));
- unsigned int mask_4x4_int_r = 0, mask_4x4_int_c = 0;
- const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
-
- // Determine the vertical edges that need filtering
- int idx_c;
- for (idx_c = 0; idx_c < cm->mib_size && mi_col + idx_c < cm->mi_cols;
- idx_c += col_step) {
- const MODE_INFO *mi = mib[idx_r * cm->mi_stride + idx_c];
- const MB_MODE_INFO *mbmi = &mi[0].mbmi;
- const BLOCK_SIZE sb_type = mbmi->sb_type;
- const int skip_this = mbmi->skip && is_inter_block(mbmi);
- // Map index to 8x8 unit
- const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
-
- const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
- const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
-
- // left edge of current unit is block/partition edge -> no skip
- const int block_edge_left =
- (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
- const int skip_this_c = skip_this && !block_edge_left;
- // top edge of current unit is block/partition edge -> no skip
- const int block_edge_above =
- (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
- const int skip_this_r = skip_this && !block_edge_above;
-
- TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? av1_get_uv_tx_size(mbmi, plane)
- : mbmi->tx_size;
-
- const int skip_border_4x4_c =
- ss_x && mi_col + idx_c >= cm->mi_cols - mi_size_wide[BLOCK_8X8];
- const int skip_border_4x4_r =
- ss_y && mi_row + idx_r >= cm->mi_rows - mi_size_high[BLOCK_8X8];
-
- int tx_size_mask = 0;
- const int c_step = (c >> ss_x);
- const int r_step = (r >> ss_y);
- const int col_mask = 1 << c_step;
-
-#if CONFIG_VAR_TX
- if (is_inter_block(mbmi) && !mbmi->skip) {
- const int tx_row_idx =
- (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
- const int tx_col_idx =
- (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- const BLOCK_SIZE bsize =
- AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, plane));
-#else
- const BLOCK_SIZE bsize = get_plane_block_size(mbmi->sb_type, plane);
-#endif
- const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
- tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? uv_txsize_lookup[bsize][mb_tx_size][0][0]
- : mb_tx_size;
- }
-#endif
-
-// Filter level can vary per MI
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi)))
- continue;
-#else
-#if CONFIG_LPF_SB
- if (!(lfl_r[c_step] =
- get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi)))
- continue;
-#else
- if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, mbmi))) continue;
-#endif // CONFIG_LPF_SB
-#endif
-#else
- if (!(lfl_r[c_step] = get_filter_level(&cm->lf_info, mbmi))) continue;
-#endif
-
-#if CONFIG_VAR_TX
- TX_SIZE tx_size_horz_edge, tx_size_vert_edge;
-
- // filt_len_vert_edge is the length of deblocking filter for a vertical edge
- // The filter direction of a vertical edge is horizontal.
- // Thus, filt_len_vert_edge is determined as the minimum width of the two
- // transform block sizes on the left and right (current block) side of edge
- const int filt_len_vert_edge = AOMMIN(
- tx_size_wide[tx_size],
- tx_size_wide[cm->left_txfm_context[pl][((mi_row + idx_r) & MAX_MIB_MASK)
- << TX_UNIT_HIGH_LOG2]]);
-
- // filt_len_horz_edge is the len of deblocking filter for a horizontal edge
- // The filter direction of a horizontal edge is vertical.
- // Thus, filt_len_horz_edge is determined as the minimum height of the two
- // transform block sizes on the top and bottom (current block) side of edge
- const int filt_len_horz_edge =
- AOMMIN(tx_size_high[tx_size],
- tx_size_high[cm->top_txfm_context[pl][(mi_col + idx_c)
- << TX_UNIT_WIDE_LOG2]]);
-
- // transform width/height of current block
- const int tx_wide_cur = tx_size_wide[tx_size];
- const int tx_high_cur = tx_size_high[tx_size];
-
- // tx_size_vert_edge is square transform size for a vertical deblocking edge
- // It determines the type of filter applied to the vertical edge
- // Similarly, tx_size_horz_edge is for a horizontal deblocking edge
- tx_size_vert_edge = get_sqr_tx_size(filt_len_vert_edge);
- tx_size_horz_edge = get_sqr_tx_size(filt_len_horz_edge);
-
- memset(cm->top_txfm_context[pl] + ((mi_col + idx_c) << TX_UNIT_WIDE_LOG2),
- tx_size, mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2);
- memset(cm->left_txfm_context[pl] +
- (((mi_row + idx_r) & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2),
- tx_size, mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2);
-#else
- // The length (or equally the square tx size) of deblocking filter is only
- // determined by
- // a) current block's width for a vertical deblocking edge
- // b) current block's height for a horizontal deblocking edge
- TX_SIZE tx_size_vert_edge = txsize_horz_map[tx_size];
- TX_SIZE tx_size_horz_edge = txsize_vert_map[tx_size];
- (void)pl;
-#endif // CONFIG_VAR_TX
-
- if (tx_size_vert_edge == TX_32X32)
- tx_size_mask = 3;
- else if (tx_size_vert_edge == TX_16X16)
- tx_size_mask = 1;
- else
- tx_size_mask = 0;
-
- // Build masks based on the transform size of each block
- // handle vertical mask
- if (tx_size_vert_edge == TX_32X32) {
- if (!skip_this_c && (c_step & tx_size_mask) == 0) {
- if (!skip_border_4x4_c)
- col_masks.m16x16 |= col_mask;
- else
- col_masks.m8x8 |= col_mask;
- }
- } else if (tx_size_vert_edge == TX_16X16) {
- if (!skip_this_c && (c_step & tx_size_mask) == 0) {
- if (!skip_border_4x4_c)
- col_masks.m16x16 |= col_mask;
- else
- col_masks.m8x8 |= col_mask;
- }
- } else {
- // force 8x8 filtering on 32x32 boundaries
- if (!skip_this_c && (c_step & tx_size_mask) == 0) {
- if (tx_size_vert_edge == TX_8X8 || (c_step & 3) == 0)
- col_masks.m8x8 |= col_mask;
- else
- col_masks.m4x4 |= col_mask;
- }
-
-#if CONFIG_VAR_TX
- if (!skip_this && tx_wide_cur < 8 && !skip_border_4x4_c &&
- (c_step & tx_size_mask) == 0)
-#else
- if (!skip_this && tx_size_vert_edge < TX_8X8 && !skip_border_4x4_c &&
- (c_step & tx_size_mask) == 0)
-#endif // CONFIG_VAR_TX
- mask_4x4_int_c |= col_mask;
- }
-
- if (tx_size_horz_edge == TX_32X32)
- tx_size_mask = 3;
- else if (tx_size_horz_edge == TX_16X16)
- tx_size_mask = 1;
- else
- tx_size_mask = 0;
-
- // set horizontal mask
- if (tx_size_horz_edge == TX_32X32) {
- if (!skip_this_r && (r_step & tx_size_mask) == 0) {
- if (!skip_border_4x4_r)
- row_masks.m16x16 |= col_mask;
- else
- row_masks.m8x8 |= col_mask;
- }
- } else if (tx_size_horz_edge == TX_16X16) {
- if (!skip_this_r && (r_step & tx_size_mask) == 0) {
- if (!skip_border_4x4_r)
- row_masks.m16x16 |= col_mask;
- else
- row_masks.m8x8 |= col_mask;
- }
- } else {
- // force 8x8 filtering on 32x32 boundaries
- if (!skip_this_r && (r_step & tx_size_mask) == 0) {
- if (tx_size_horz_edge == TX_8X8 || (r_step & 3) == 0)
- row_masks.m8x8 |= col_mask;
- else
- row_masks.m4x4 |= col_mask;
- }
-
-#if CONFIG_VAR_TX
- if (!skip_this && tx_high_cur < 8 && !skip_border_4x4_r &&
- (r_step & tx_size_mask) == 0)
-#else
- if (!skip_this && tx_size_horz_edge < TX_8X8 && !skip_border_4x4_r &&
- (r_step & tx_size_mask) == 0)
-#endif // CONFIG_VAR_TX
- mask_4x4_int_r |= col_mask;
- }
- }
-
- if (row_masks_ptr) *row_masks_ptr = row_masks;
- if (col_masks_ptr) *col_masks_ptr = col_masks;
- if (mask_4x4_int_c_ptr) *mask_4x4_int_c_ptr = mask_4x4_int_c;
- if (mask_4x4_int_r_ptr) *mask_4x4_int_r_ptr = mask_4x4_int_r;
-}
-
-void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mib, int mi_row, int mi_col,
- int pl) {
- const int ss_y = plane->subsampling_y;
- const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
-#if CONFIG_LPF_DIRECT
- const int ss_x = plane->subsampling_x;
- const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
-#endif
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
- uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
-
- int idx_r;
- for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
- idx_r += row_step) {
- unsigned int mask_4x4_int;
- FilterMasks col_masks;
- const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
- get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
- &lfl[r][0], NULL, &mask_4x4_int, NULL,
- &col_masks);
-
- // Disable filtering on the leftmost column or tile boundary
- unsigned int border_mask = ~(mi_col == 0 ? 1 : 0);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
- MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
- if (av1_disable_loopfilter_on_tile_boundary(cm) &&
- ((mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) {
- border_mask = 0xfffffffe;
- }
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
-
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- highbd_filter_selectively_vert(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- col_masks.m16x16 & border_mask, col_masks.m8x8 & border_mask,
- col_masks.m4x4 & border_mask, mask_4x4_int, &cm->lf_info, &lfl[r][0],
- (int)cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- filter_selectively_vert(
- dst->buf, dst->stride, col_masks.m16x16 & border_mask,
- col_masks.m8x8 & border_mask, col_masks.m4x4 & border_mask,
- mask_4x4_int, &cm->lf_info, &lfl[r][0]
-#if CONFIG_LPF_DIRECT
- ,
- dst->buf0, mi_row, mi_col, idx_r, col_step, cm->width, cm->height,
- ss_x, ss_y
-#endif // CONFIG_LPF_DIRECT
- );
- dst->buf += 8 * dst->stride;
- }
-
- // Now do horizontal pass
- dst->buf = dst0;
-}
-
-void av1_filter_block_plane_non420_hor(AV1_COMMON *const cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mib, int mi_row, int mi_col,
- int pl) {
- const int ss_y = plane->subsampling_y;
- const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
-#if CONFIG_LPF_DIRECT
- const int ss_x = plane->subsampling_x;
- const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
-#endif
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
- uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
-
- int idx_r;
- for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
- idx_r += row_step) {
- unsigned int mask_4x4_int;
- FilterMasks row_masks;
- const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
- get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
- &lfl[r][0], &mask_4x4_int, NULL,
- &row_masks, NULL);
-
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
- // Disable filtering on the abovemost row or tile boundary
- const MODE_INFO *mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
- if ((av1_disable_loopfilter_on_tile_boundary(cm) &&
- (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)) ||
- (mi_row + idx_r == 0))
- memset(&row_masks, 0, sizeof(row_masks));
-#else
- if (mi_row + idx_r == 0) memset(&row_masks, 0, sizeof(row_masks));
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
-
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, row_masks.m16x16,
- row_masks.m8x8, row_masks.m4x4, mask_4x4_int, &cm->lf_info,
- &lfl[r][0], (int)cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- filter_selectively_horiz(dst->buf, dst->stride, row_masks.m16x16,
- row_masks.m8x8, row_masks.m4x4, mask_4x4_int,
- &cm->lf_info, &lfl[r][0]
-#if CONFIG_LPF_DIRECT
- ,
- dst->buf0, mi_row, mi_col, idx_r, col_step,
- cm->width, cm->height, ss_x, ss_y
-#endif // CONFIG_LPF_DIRECT
- );
- dst->buf += 8 * dst->stride;
- }
- dst->buf = dst0;
-}
-
-void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm) {
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
- int r;
- uint64_t mask_16x16 = lfm->left_y[TX_16X16];
- uint64_t mask_8x8 = lfm->left_y[TX_8X8];
- uint64_t mask_4x4 = lfm->left_y[TX_4X4];
- uint64_t mask_4x4_int = lfm->int_4x4_y;
-
- assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
-
- // Vertical pass: do 2 rows at one time
- for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
- unsigned int mask_16x16_l = mask_16x16 & 0xffff;
- unsigned int mask_8x8_l = mask_8x8 & 0xffff;
- unsigned int mask_4x4_l = mask_4x4 & 0xffff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
-
-// Disable filtering on the leftmost column.
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- highbd_filter_selectively_vert_row2(
- plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_y[r][0], (int)cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
-
- dst->buf += 2 * MI_SIZE * dst->stride;
- mask_16x16 >>= 2 * MI_SIZE;
- mask_8x8 >>= 2 * MI_SIZE;
- mask_4x4 >>= 2 * MI_SIZE;
- mask_4x4_int >>= 2 * MI_SIZE;
- }
-
- // Horizontal pass
- dst->buf = dst0;
-}
-
-void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm) {
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
- int r;
- uint64_t mask_16x16 = lfm->above_y[TX_16X16];
- uint64_t mask_8x8 = lfm->above_y[TX_8X8];
- uint64_t mask_4x4 = lfm->above_y[TX_4X4];
- uint64_t mask_4x4_int = lfm->int_4x4_y;
-
- assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
-
- for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
- unsigned int mask_16x16_r;
- unsigned int mask_8x8_r;
- unsigned int mask_4x4_r;
-
- if (mi_row + r == 0) {
- mask_16x16_r = 0;
- mask_8x8_r = 0;
- mask_4x4_r = 0;
- } else {
- mask_16x16_r = mask_16x16 & 0xff;
- mask_8x8_r = mask_8x8 & 0xff;
- mask_4x4_r = mask_4x4 & 0xff;
+ for (int j = 0; j < 4; ++j) {
+ for (int i = start; i < end; ++i)
+ if (ref_buf[i] != dst_buf[i]) {
+ ref_buf = ref0;
+ dst_buf = dst0;
+ return i + 1;
+ }
+ ref_buf += ref_stride;
+ dst_buf += dst_stride;
}
-
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
- (int)cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
-#if !CONFIG_LPF_DIRECT
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
- &lfm->lfl_y[r][0]);
-#endif // CONFIG_LPF_DIRECT
-
- dst->buf += MI_SIZE * dst->stride;
- mask_16x16 >>= MI_SIZE;
- mask_8x8 >>= MI_SIZE;
- mask_4x4 >>= MI_SIZE;
- mask_4x4_int >>= MI_SIZE;
}
- // restore the buf pointer in case there is additional filter pass.
- dst->buf = dst0;
+ ref_buf = ref0;
+ dst_buf = dst0;
+ return 0;
}
-void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm) {
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
+void av1_filter_block_plane_ver(AV1_COMMON *const cm,
+ struct macroblockd_plane *const plane_ptr,
+ int pl, int mi_row, int mi_col) {
+ struct buf_2d *const dst = &plane_ptr->dst;
int r, c;
+ const int ssx = plane_ptr->subsampling_x;
+ const int ssy = plane_ptr->subsampling_y;
+ const int mask_cutoff = 0xffff;
+ const int single_step = 1 << ssy;
+ const int r_step = 2 << ssy;
+ uint64_t mask_16x16 = 0;
+ uint64_t mask_8x8 = 0;
+ uint64_t mask_4x4 = 0;
+ uint8_t *lfl;
+ uint8_t *lfl2;
+
+ // filter two rows at a time
+ for (r = 0; r < cm->seq_params.mib_size &&
+ ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
+ r += r_step) {
+ for (c = 0; c < cm->seq_params.mib_size &&
+ ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
+ c += MI_SIZE_64X64) {
+ dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
+ assert(lfm);
+ const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
+ const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
+ int index = 0;
+ const int shift = get_index_shift(col, row, &index);
+ // current and next row should belong to the same mask_idx and index
+ // next row's shift
+ const int row_next = row + single_step;
+ int index_next = 0;
+ const int shift_next = get_index_shift(col, row_next, &index_next);
+ switch (pl) {
+ case 0:
+ mask_16x16 = lfm->left_y[TX_16X16].bits[index];
+ mask_8x8 = lfm->left_y[TX_8X8].bits[index];
+ mask_4x4 = lfm->left_y[TX_4X4].bits[index];
+ lfl = &lfm->lfl_y_ver[row][col];
+ lfl2 = &lfm->lfl_y_ver[row_next][col];
+ break;
+ case 1:
+ mask_16x16 = lfm->left_u[TX_16X16].bits[index];
+ mask_8x8 = lfm->left_u[TX_8X8].bits[index];
+ mask_4x4 = lfm->left_u[TX_4X4].bits[index];
+ lfl = &lfm->lfl_u_ver[row][col];
+ lfl2 = &lfm->lfl_u_ver[row_next][col];
+ break;
+ case 2:
+ mask_16x16 = lfm->left_v[TX_16X16].bits[index];
+ mask_8x8 = lfm->left_v[TX_8X8].bits[index];
+ mask_4x4 = lfm->left_v[TX_4X4].bits[index];
+ lfl = &lfm->lfl_v_ver[row][col];
+ lfl2 = &lfm->lfl_v_ver[row_next][col];
+ break;
+ default: assert(pl >= 0 && pl <= 2); return;
+ }
+ uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
+ uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
+ uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
+ uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
+ uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
+ uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
- uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
- uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
- uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
- uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
-
- assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
- assert(plane->plane_type == PLANE_TYPE_UV);
- memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
-
- // Vertical pass: do 2 rows at one time
- for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
- for (c = 0; c < (cm->mib_size >> 1); c++) {
- lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
- lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
- }
-
- {
- unsigned int mask_16x16_l = mask_16x16 & 0xff;
- unsigned int mask_8x8_l = mask_8x8 & 0xff;
- unsigned int mask_4x4_l = mask_4x4 & 0xff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
-
-// Disable filtering on the leftmost column.
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
highbd_filter_selectively_vert_row2(
- plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
+ ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
+ mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
+ &cm->lf_info, lfl, lfl2, (int)cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
- filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
- dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_uv[r >> 1][0]);
-
- dst->buf += 2 * MI_SIZE * dst->stride;
- mask_16x16 >>= MI_SIZE;
- mask_8x8 >>= MI_SIZE;
- mask_4x4 >>= MI_SIZE;
- mask_4x4_int >>= MI_SIZE;
+ filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
+ mask_16x16_0, mask_8x8_0, mask_4x4_0,
+ mask_16x16_1, mask_8x8_1, mask_4x4_1,
+ &cm->lf_info, lfl, lfl2);
+ dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
}
+ dst->buf += 2 * MI_SIZE * dst->stride;
}
-
- // Horizontal pass
- dst->buf = dst0;
}
-void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm) {
- struct buf_2d *const dst = &plane->dst;
- uint8_t *const dst0 = dst->buf;
+void av1_filter_block_plane_hor(AV1_COMMON *const cm,
+ struct macroblockd_plane *const plane_ptr,
+ int pl, int mi_row, int mi_col) {
+ struct buf_2d *const dst = &plane_ptr->dst;
int r, c;
- uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
- uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
- uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
- uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
-
- assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
- memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
-
- // re-porpulate the filter level for uv, same as the code for vertical
- // filter in av1_filter_block_plane_ss11_ver
- for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
- for (c = 0; c < (cm->mib_size >> 1); c++) {
- lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
- lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
- }
- }
+ const int ssx = plane_ptr->subsampling_x;
+ const int ssy = plane_ptr->subsampling_y;
+ const int mask_cutoff = 0xffff;
+ const int r_step = 1 << ssy;
+ uint64_t mask_16x16 = 0;
+ uint64_t mask_8x8 = 0;
+ uint64_t mask_4x4 = 0;
+ uint8_t *lfl;
+
+ for (r = 0; r < cm->seq_params.mib_size &&
+ ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
+ r += r_step) {
+ for (c = 0; c < cm->seq_params.mib_size &&
+ ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
+ c += MI_SIZE_64X64) {
+ if (mi_row + r == 0) continue;
+
+ dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
+ LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
+ assert(lfm);
+ const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
+ const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
+ int index = 0;
+ const int shift = get_index_shift(col, row, &index);
+ switch (pl) {
+ case 0:
+ mask_16x16 = lfm->above_y[TX_16X16].bits[index];
+ mask_8x8 = lfm->above_y[TX_8X8].bits[index];
+ mask_4x4 = lfm->above_y[TX_4X4].bits[index];
+ lfl = &lfm->lfl_y_hor[row][col];
+ break;
+ case 1:
+ mask_16x16 = lfm->above_u[TX_16X16].bits[index];
+ mask_8x8 = lfm->above_u[TX_8X8].bits[index];
+ mask_4x4 = lfm->above_u[TX_4X4].bits[index];
+ lfl = &lfm->lfl_u_hor[row][col];
+ break;
+ case 2:
+ mask_16x16 = lfm->above_v[TX_16X16].bits[index];
+ mask_8x8 = lfm->above_v[TX_8X8].bits[index];
+ mask_4x4 = lfm->above_v[TX_4X4].bits[index];
+ lfl = &lfm->lfl_v_hor[row][col];
+ break;
+ default: assert(pl >= 0 && pl <= 2); return;
+ }
+ mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
+ mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
+ mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
- for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
- const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
- const unsigned int mask_4x4_int_r =
- skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
- unsigned int mask_16x16_r;
- unsigned int mask_8x8_r;
- unsigned int mask_4x4_r;
-
- if (mi_row + r == 0) {
- mask_16x16_r = 0;
- mask_8x8_r = 0;
- mask_4x4_r = 0;
- } else {
- mask_16x16_r = mask_16x16 & 0xf;
- mask_8x8_r = mask_8x8 & 0xf;
- mask_4x4_r = mask_4x4 & 0xf;
+ if (cm->use_highbitdepth)
+ highbd_filter_selectively_horiz(
+ CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
+ mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->bit_depth);
+ else
+ filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
+ mask_8x8, mask_4x4, &cm->lf_info, lfl);
+ dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
}
-
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
- (int)cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
-#if !CONFIG_LPF_DIRECT
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfm->lfl_uv[r >> 1][0]);
-#endif // CONFIG_LPF_DIRECT
-
dst->buf += MI_SIZE * dst->stride;
- mask_16x16 >>= MI_SIZE / 2;
- mask_8x8 >>= MI_SIZE / 2;
- mask_4x4 >>= MI_SIZE / 2;
- mask_4x4_int >>= MI_SIZE / 2;
}
- // restore the buf pointer in case there is additional filter pass.
- dst->buf = dst0;
}
-
-#if CONFIG_PARALLEL_DEBLOCKING
-typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
-static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = {
- // mask for vertical edges filtering
- {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 2 - 1, // BLOCK_2X2
- 2 - 1, // BLOCK_2X4
- 4 - 1, // BLOCK_4X2
-#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 4 - 1, // BLOCK_4X4
- 4 - 1, // BLOCK_4X8
- 8 - 1, // BLOCK_8X4
- 8 - 1, // BLOCK_8X8
- 8 - 1, // BLOCK_8X16
- 16 - 1, // BLOCK_16X8
- 16 - 1, // BLOCK_16X16
- 16 - 1, // BLOCK_16X32
- 32 - 1, // BLOCK_32X16
- 32 - 1, // BLOCK_32X32
- 32 - 1, // BLOCK_32X64
- 64 - 1, // BLOCK_64X32
- 64 - 1, // BLOCK_64X64
-#if CONFIG_EXT_PARTITION
- 64 - 1, // BLOCK_64X128
- 128 - 1, // BLOCK_128X64
- 128 - 1, // BLOCK_128X128
-#endif // CONFIG_EXT_PARTITION
- 4 - 1, // BLOCK_4X16,
- 16 - 1, // BLOCK_16X4,
- 8 - 1, // BLOCK_8X32,
- 32 - 1, // BLOCK_32X8,
- 16 - 1, // BLOCK_16X64,
- 64 - 1, // BLOCK_64X16
-#if CONFIG_EXT_PARTITION
- 32 - 1, // BLOCK_32X128
- 128 - 1, // BLOCK_128X32
-#endif // CONFIG_EXT_PARTITION
- },
- // mask for horizontal edges filtering
- {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 2 - 1, // BLOCK_2X2
- 4 - 1, // BLOCK_2X4
- 2 - 1, // BLOCK_4X2
-#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 4 - 1, // BLOCK_4X4
- 8 - 1, // BLOCK_4X8
- 4 - 1, // BLOCK_8X4
- 8 - 1, // BLOCK_8X8
- 16 - 1, // BLOCK_8X16
- 8 - 1, // BLOCK_16X8
- 16 - 1, // BLOCK_16X16
- 32 - 1, // BLOCK_16X32
- 16 - 1, // BLOCK_32X16
- 32 - 1, // BLOCK_32X32
- 64 - 1, // BLOCK_32X64
- 32 - 1, // BLOCK_64X32
- 64 - 1, // BLOCK_64X64
-#if CONFIG_EXT_PARTITION
- 128 - 1, // BLOCK_64X128
- 64 - 1, // BLOCK_128X64
- 128 - 1, // BLOCK_128X128
-#endif // CONFIG_EXT_PARTITION
- 16 - 1, // BLOCK_4X16,
- 4 - 1, // BLOCK_16X4,
- 32 - 1, // BLOCK_8X32,
- 8 - 1, // BLOCK_32X8,
- 64 - 1, // BLOCK_16X64,
- 16 - 1, // BLOCK_64X16
-#if CONFIG_EXT_PARTITION
- 128 - 1, // BLOCK_32X128
- 32 - 1, // BLOCK_128X32
-#endif // CONFIG_EXT_PARTITION
- },
-};
-
-static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
- {
-#if CONFIG_CHROMA_2X2
- 2 - 1, // TX_2X2
-#endif
- 4 - 1, // TX_4X4
- 8 - 1, // TX_8X8
- 16 - 1, // TX_16X16
- 32 - 1, // TX_32X32
-#if CONFIG_TX64X64
- 64 - 1, // TX_64X64
-#endif // CONFIG_TX64X64
- 4 - 1, // TX_4X8
- 8 - 1, // TX_8X4
- 8 - 1, // TX_8X16
- 16 - 1, // TX_16X8
- 16 - 1, // TX_16X32
- 32 - 1, // TX_32X16
-#if CONFIG_TX64X64
- 32 - 1, // TX_32X64
- 64 - 1, // TX_64X32
-#endif // CONFIG_TX64X64
- 4 - 1, // TX_4X16
- 16 - 1, // TX_16X4
- 8 - 1, // TX_8X32
- 32 - 1 // TX_32X8
- },
- {
-#if CONFIG_CHROMA_2X2
- 2 - 1, // TX_2X2
-#endif
- 4 - 1, // TX_4X4
- 8 - 1, // TX_8X8
- 16 - 1, // TX_16X16
- 32 - 1, // TX_32X32
-#if CONFIG_TX64X64
- 64 - 1, // TX_64X64
-#endif // CONFIG_TX64X64
- 8 - 1, // TX_4X8
- 4 - 1, // TX_8X4
- 16 - 1, // TX_8X16
- 8 - 1, // TX_16X8
- 32 - 1, // TX_16X32
- 16 - 1, // TX_32X16
-#if CONFIG_TX64X64
- 64 - 1, // TX_32X64
- 32 - 1, // TX_64X32
-#endif // CONFIG_TX64X64
- 16 - 1, // TX_4X16
- 4 - 1, // TX_16X4
- 32 - 1, // TX_8X32
- 8 - 1 // TX_32X8
- }
-};
-
-static TX_SIZE av1_get_transform_size(const MODE_INFO *const mi,
- const EDGE_DIR edge_dir, const int mi_row,
- const int mi_col, const int plane,
- const struct macroblockd_plane *plane_ptr,
- const uint32_t scale_horz,
- const uint32_t scale_vert) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- TX_SIZE tx_size = (plane == AOM_PLANE_Y)
- ? mbmi->tx_size
- : av1_get_uv_tx_size(mbmi, plane_ptr);
+#endif // LOOP_FILTER_BITMASK
+
+static TX_SIZE get_transform_size(const MACROBLOCKD *const xd,
+ const MB_MODE_INFO *const mbmi,
+ const EDGE_DIR edge_dir, const int mi_row,
+ const int mi_col, const int plane,
+ const struct macroblockd_plane *plane_ptr) {
+ assert(mbmi != NULL);
+ if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
+
+ TX_SIZE tx_size =
+ (plane == AOM_PLANE_Y)
+ ? mbmi->tx_size
+ : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x,
+ plane_ptr->subsampling_y);
assert(tx_size < TX_SIZES_ALL);
-
-#if CONFIG_VAR_TX
- // mi_row and mi_col is the absolute position of the MI block.
- // idx_c and idx_r is the relative offset of the MI within the super block
- // c and r is the relative offset of the 8x8 block within the supert block
- // blk_row and block_col is the relative offset of the current 8x8 block
- // within the current partition.
- const int idx_c = mi_col & MAX_MIB_MASK;
- const int idx_r = mi_row & MAX_MIB_MASK;
- const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
- const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
- const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
- const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
- const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
-
- if (is_inter_block(mbmi) && !mbmi->skip) {
- const int tx_row_idx =
- (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
- const int tx_col_idx =
- (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
-
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- const BLOCK_SIZE bsize =
- AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scale_horz][scale_vert]);
-#else
- const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scale_horz][scale_vert];
-#endif
- const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
-
+ if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip) {
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
+ const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
+ const TX_SIZE mb_tx_size =
+ mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
assert(mb_tx_size < TX_SIZES_ALL);
-
- tx_size = (plane == AOM_PLANE_Y)
- ? mb_tx_size
- : uv_txsize_lookup[bsize][mb_tx_size][0][0];
- assert(tx_size < TX_SIZES_ALL);
+ tx_size = mb_tx_size;
}
-#else
- (void)mi_row;
- (void)mi_col;
- (void)scale_horz;
- (void)scale_vert;
-#endif // CONFIG_VAR_TX
// since in case of chrominance or non-square transorm need to convert
// transform size into transform size in particular direction.
@@ -2926,111 +1524,84 @@ static TX_SIZE av1_get_transform_size(const MODE_INFO *const mi,
typedef struct AV1_DEBLOCKING_PARAMETERS {
// length of the filter applied to the outer edge
uint32_t filter_length;
- // length of the filter applied to the inner edge
- uint32_t filter_length_internal;
// deblocking limits
const uint8_t *lim;
const uint8_t *mblim;
const uint8_t *hev_thr;
} AV1_DEBLOCKING_PARAMETERS;
-static void set_lpf_parameters(
+// Return TX_SIZE from get_transform_size(), so it is plane and direction
+// awared
+static TX_SIZE set_lpf_parameters(
AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
- const AV1_COMMON *const cm, const EDGE_DIR edge_dir, const uint32_t x,
- const uint32_t y, const int plane,
- const struct macroblockd_plane *const plane_ptr) {
+ const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
+ const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
+ const int plane, const struct macroblockd_plane *const plane_ptr) {
// reset to initial values
params->filter_length = 0;
- params->filter_length_internal = 0;
// no deblocking is required
const uint32_t width = plane_ptr->dst.width;
const uint32_t height = plane_ptr->dst.height;
if ((width <= x) || (height <= y)) {
- return;
+ // just return the smallest transform unit size
+ return TX_4X4;
}
const uint32_t scale_horz = plane_ptr->subsampling_x;
const uint32_t scale_vert = plane_ptr->subsampling_y;
- const int mi_row = (y << scale_vert) >> MI_SIZE_LOG2;
- const int mi_col = (x << scale_horz) >> MI_SIZE_LOG2;
- MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
- const MB_MODE_INFO *mbmi = &mi[0]->mbmi;
+ // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
+ // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
+ // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
+ // and mi_col should be odd number for chroma plane.
+ const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
+ const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
+ MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
+ const MB_MODE_INFO *mbmi = mi[0];
+ // If current mbmi is not correctly setup, return an invalid value to stop
+ // filtering. One example is that if this tile is not coded, then its mbmi
+ // it not set up.
+ if (mbmi == NULL) return TX_INVALID;
+
+ const TX_SIZE ts =
+ get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr);
{
- const TX_SIZE ts =
- av1_get_transform_size(mi[0], edge_dir, mi_row, mi_col, plane,
- plane_ptr, scale_horz, scale_vert);
-
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- const uint32_t curr_level =
- get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
-#else
-#if CONFIG_LPF_SB
- const uint32_t curr_level =
- get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi);
-#else
- const uint32_t curr_level = get_filter_level(cm, &cm->lf_info, mbmi);
-#endif // CONFIG_LPF_SB
-#endif
-#else
- const uint32_t curr_level = get_filter_level(&cm->lf_info, mbmi);
-#endif // CONFIG_EXT_DELTA_Q
-
- const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
- uint32_t level = curr_level;
+ const uint32_t transform_masks =
+ edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
+ const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
+
+ if (!tu_edge) return ts;
+
// prepare outer edge parameters. deblock the edge if it's an edge of a TU
- if (coord) {
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
- MODE_INFO *const mi_bound = cm->mi + mi_row * cm->mi_stride + mi_col;
- if (!av1_disable_loopfilter_on_tile_boundary(cm) ||
- ((VERT_EDGE == edge_dir) &&
- (0 == (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
- ((HORZ_EDGE == edge_dir) &&
- (0 == (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
- {
- const int32_t tu_edge =
- (coord & av1_transform_masks[edge_dir][ts]) ? (0) : (1);
- if (tu_edge) {
- const MODE_INFO *const mi_prev = *(mi - mode_step);
+ {
+ const uint32_t curr_level =
+ get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
+ const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
+ uint32_t level = curr_level;
+ if (coord) {
+ {
+ const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
+ if (mi_prev == NULL) return TX_INVALID;
const int pv_row =
(VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
const int pv_col =
(VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
- const TX_SIZE pv_ts =
- av1_get_transform_size(mi_prev, edge_dir, pv_row, pv_col, plane,
- plane_ptr, scale_horz, scale_vert);
-
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, edge_dir,
- plane, &mi_prev->mbmi);
-#else
-#if CONFIG_LPF_SB
- const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, pv_row,
- pv_col, &mi_prev->mbmi);
-#else
- const uint32_t pv_lvl =
- get_filter_level(cm, &cm->lf_info, &mi_prev->mbmi);
-#endif // CONFIG_LPF_SB
-#endif
-#else
+ const TX_SIZE pv_ts = get_transform_size(
+ xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr);
+
const uint32_t pv_lvl =
- get_filter_level(&cm->lf_info, &mi_prev->mbmi);
-#endif // CONFIG_EXT_DELTA_Q
-
- const int pv_skip =
- mi_prev->mbmi.skip && is_inter_block(&mi_prev->mbmi);
- const int32_t pu_edge =
- (coord &
- av1_prediction_masks[edge_dir]
- [ss_size_lookup[mbmi->sb_type][scale_horz]
- [scale_vert]])
- ? (0)
- : (1);
+ get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
+
+ const int pv_skip = mi_prev->skip && is_inter_block(mi_prev);
+ const BLOCK_SIZE bsize =
+ get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x,
+ plane_ptr->subsampling_y);
+ const int prediction_masks = edge_dir == VERT_EDGE
+ ? block_size_wide[bsize] - 1
+ : block_size_high[bsize] - 1;
+ const int32_t pu_edge = !(coord & prediction_masks);
// if the current and the previous blocks are skipped,
// deblock the edge if the edge belongs to a PU's edge only.
if ((curr_level || pv_lvl) &&
@@ -3039,41 +1610,26 @@ static void set_lpf_parameters(
if (TX_4X4 >= min_ts) {
params->filter_length = 4;
} else if (TX_8X8 == min_ts) {
- params->filter_length = 8;
+ if (plane != 0)
+ params->filter_length = 6;
+ else
+ params->filter_length = 8;
} else {
- params->filter_length = 16;
-#if PARALLEL_DEBLOCKING_15TAPLUMAONLY
+ params->filter_length = 14;
// No wide filtering for chroma plane
if (plane != 0) {
-#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
params->filter_length = 6;
-#else
- params->filter_length = 8;
-#endif
}
-#endif
}
-#if PARALLEL_DEBLOCKING_DISABLE_15TAP
- params->filter_length = (TX_4X4 >= AOMMIN(ts, pv_ts)) ? (4) : (8);
-#endif // PARALLEL_DEBLOCKING_DISABLE_15TAP
-
// update the level if the current block is skipped,
// but the previous one is not
level = (curr_level) ? (curr_level) : (pv_lvl);
}
}
}
-
-#if !CONFIG_CB4X4
- // prepare internal edge parameters
- if (curr_level && !curr_skipped) {
- params->filter_length_internal = (TX_4X4 >= ts) ? (4) : (0);
- }
-#endif
-
// prepare common parameters
- if (params->filter_length || params->filter_length_internal) {
+ if (params->filter_length) {
const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
params->lim = limits->lim;
params->mblim = limits->mblim;
@@ -3081,654 +1637,278 @@ static void set_lpf_parameters(
}
}
}
+
+ return ts;
}
-static void av1_filter_block_plane_vert(
- const AV1_COMMON *const cm, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
- const uint32_t mi_col) {
- const int col_step = MI_SIZE >> MI_SIZE_LOG2;
+void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd, const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row, const uint32_t mi_col) {
const int row_step = MI_SIZE >> MI_SIZE_LOG2;
const uint32_t scale_horz = plane_ptr->subsampling_x;
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
-#if CONFIG_LPF_SB
- int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
- y_range = AOMMIN(y_range, cm->mi_rows);
- y_range >>= scale_vert;
-
- int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
- x_range = AOMMIN(x_range, cm->mi_cols);
- x_range >>= scale_horz;
-#else
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
-#endif // CONFIG_LPF_SB
for (int y = 0; y < y_range; y += row_step) {
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
- for (int x = 0; x < x_range; x += col_step) {
+ for (int x = 0; x < x_range;) {
// inner loop always filter vertical edges in a MI block. If MI size
// is 8x8, it will filter the vertical edge aligned with a 8x8 block.
// If 4x4 trasnform is used, it will then filter the internal edge
// aligned with a 4x4 block
const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+ uint32_t advance_units;
+ TX_SIZE tx_size;
AV1_DEBLOCKING_PARAMETERS params;
memset(&params, 0, sizeof(params));
- set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, VERT_EDGE,
- curr_x, curr_y, plane, plane_ptr);
-
-#if CONFIG_LPF_DIRECT
- uint8_t *const src = plane_ptr->dst.buf0;
- const int width = cm->width >> scale_horz;
- const int height = cm->height >> scale_vert;
- const int pivot = 8;
- const int line_length = 16;
- uint8_t block[128];
- int orig_pos[128];
- const int vert_or_horz = 0; // 0: vertical
- const int unit = 1;
- int i;
- for (i = 0; i < 128; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
+ tx_size =
+ set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
+ VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
+ if (tx_size == TX_INVALID) {
+ params.filter_length = 0;
+ tx_size = TX_4X4;
}
- if (params.filter_length) {
- const int filt_len = params.filter_length == 16 ? 8 : 4;
- const int direct =
- pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
- dst_stride, unit, vert_or_horz);
-
- pick_filter_block_vert(src, block, orig_pos, filt_len, curr_y, curr_x,
- width, height, dst_stride, pivot, line_length,
- unit, direct);
- uint8_t *const filt_start = block + pivot;
- switch (params.filter_length) {
- // apply 4-tap filtering
- case 4:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_4(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // apply 8-tap filtering
- case 8:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_8(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // apply 16-tap filtering
- case 16:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_16(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // no filtering
- default: break;
- }
-
- for (i = 0; i < 128; ++i) {
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- }
-
- if (params.filter_length_internal) {
- for (i = 0; i < 128; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- const int direct =
- pick_min_grad_direct(src, 4, curr_y, curr_x + 4, width, height,
- dst_stride, unit, vert_or_horz);
-
- pick_filter_block_vert(src, block, orig_pos, 4, curr_y, curr_x + 4,
- width, height, dst_stride, pivot, line_length,
- unit, direct);
-
- uint8_t *const filt_start = block + pivot;
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_4(filt_start, line_length, params.mblim, params.lim,
- params.hev_thr);
-
- for (i = 0; i < 128; ++i) {
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- }
-#else // !CONFIG_LPF_DIRECT
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
-#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
case 6: // apply 6-tap filter for chroma plane only
assert(plane != 0);
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
+ aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
+ params.mblim, params.lim, params.hev_thr,
+ cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_6_c(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
+ aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr);
break;
-#endif
// apply 8-tap filtering
case 8:
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
- // apply 16-tap filtering
- case 16:
-#if CONFIG_HIGHBITDEPTH
+ // apply 14-tap filtering
+ case 14:
if (cm->use_highbitdepth)
-#if CONFIG_DEBLOCK_13TAP
- // TODO(olah): Remove _c once SIMD for 13-tap is available
- aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
-#else
- aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
+ aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
-#endif
else
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_DEBLOCK_13TAP
- aom_lpf_vertical_16_c(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
-#else
- aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
-#endif
+ aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr);
break;
// no filtering
default: break;
}
- // process the internal edge
- if (params.filter_length_internal) {
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p + 4), dst_stride,
- params.mblim, params.lim, params.hev_thr,
- cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_vertical_4(p + 4, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- }
-#endif // CONFIG_LPF_DIRECT
// advance the destination pointer
- p += MI_SIZE;
+ advance_units = tx_size_wide_unit[tx_size];
+ x += advance_units;
+ p += advance_units * MI_SIZE;
}
}
}
-static void av1_filter_block_plane_horz(
- const AV1_COMMON *const cm, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
- const uint32_t mi_col) {
+void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd, const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row, const uint32_t mi_col) {
const int col_step = MI_SIZE >> MI_SIZE_LOG2;
- const int row_step = MI_SIZE >> MI_SIZE_LOG2;
const uint32_t scale_horz = plane_ptr->subsampling_x;
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
-#if CONFIG_LPF_SB
- int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
- y_range = AOMMIN(y_range, cm->mi_rows);
- y_range >>= scale_vert;
-
- int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
- x_range = AOMMIN(x_range, cm->mi_cols);
- x_range >>= scale_horz;
-#else
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
-#endif // CONFIG_LPF_SB
- for (int y = 0; y < y_range; y += row_step) {
- uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
- for (int x = 0; x < x_range; x += col_step) {
+ for (int x = 0; x < x_range; x += col_step) {
+ uint8_t *p = dst_ptr + x * MI_SIZE;
+ for (int y = 0; y < y_range;) {
// inner loop always filter vertical edges in a MI block. If MI size
// is 8x8, it will first filter the vertical edge aligned with a 8x8
// block. If 4x4 trasnform is used, it will then filter the internal
// edge aligned with a 4x4 block
const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+ uint32_t advance_units;
+ TX_SIZE tx_size;
AV1_DEBLOCKING_PARAMETERS params;
memset(&params, 0, sizeof(params));
- set_lpf_parameters(&params, (cm->mi_stride << scale_vert), cm, HORZ_EDGE,
- curr_x, curr_y, plane, plane_ptr);
-
-#if CONFIG_LPF_DIRECT
- uint8_t *const src = plane_ptr->dst.buf0;
- const int width = cm->width >> scale_horz;
- const int height = cm->height >> scale_vert;
- const int pivot = 8;
- const int line_length = 16;
- uint8_t block[256];
- int orig_pos[256];
- const int vert_or_horz = 1; // 1: horizontal
- const int unit = 1;
- int i;
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
-
- if (params.filter_length) {
- const int filt_len = params.filter_length == 16 ? 8 : 4;
- const int direct =
- pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
- dst_stride, unit, vert_or_horz);
-
- pick_filter_block_horz(src, block, orig_pos, filt_len, curr_y, curr_x,
- width, height, dst_stride, pivot, line_length,
- unit, direct);
- uint8_t *const filt_start = block + pivot * line_length;
- switch (params.filter_length) {
- // apply 4-tap filtering
- case 4:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // apply 8-tap filtering
- case 8:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_8(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // apply 16-tap filtering
- case 16:
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_edge_16(
- CONVERT_TO_SHORTPTR(filt_start), line_length, params.mblim,
- params.lim, params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_edge_16(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
- break;
- // no filtering
- default: break;
- }
-
- for (i = 0; i < 256; ++i) {
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
+ tx_size =
+ set_lpf_parameters(&params, (cm->mi_stride << scale_vert), cm, xd,
+ HORZ_EDGE, curr_x, curr_y, plane, plane_ptr);
+ if (tx_size == TX_INVALID) {
+ params.filter_length = 0;
+ tx_size = TX_4X4;
}
- if (params.filter_length_internal) {
- for (i = 0; i < 256; ++i) {
- block[i] = 0;
- orig_pos[i] = -1;
- }
- const int direct =
- pick_min_grad_direct(src, 4, curr_y + 4, curr_x, width, height,
- dst_stride, unit, vert_or_horz);
-
- pick_filter_block_horz(src, block, orig_pos, 4, curr_y + 4, curr_x,
- width, height, dst_stride, pivot, line_length,
- unit, direct);
-
- uint8_t *const filt_start = block + pivot * line_length;
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
- line_length, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
- params.lim, params.hev_thr);
-
- for (i = 0; i < 256; ++i) {
- if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
- }
- }
-#else // !CONFIG_LPF_DIRECT
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
-#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
// apply 6-tap filtering
- case 6: assert(plane != 0);
-#if CONFIG_HIGHBITDEPTH
+ case 6:
+ assert(plane != 0);
if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
+ aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
+ params.mblim, params.lim,
+ params.hev_thr, cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_6_c(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
+ aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr);
break;
-#endif
// apply 8-tap filtering
case 8:
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
- // apply 16-tap filtering
- case 16:
-#if CONFIG_HIGHBITDEPTH
+ // apply 14-tap filtering
+ case 14:
if (cm->use_highbitdepth)
-#if CONFIG_DEBLOCK_13TAP
- // TODO(olah): Remove _c once SIMD for 13-tap is available
- aom_highbd_lpf_horizontal_edge_16_c(
- CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
-#else
- aom_highbd_lpf_horizontal_edge_16(
- CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
-#endif
+ aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
+ params.mblim, params.lim,
+ params.hev_thr, cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_DEBLOCK_13TAP
- aom_lpf_horizontal_edge_16_c(p, dst_stride, params.mblim,
- params.lim, params.hev_thr);
-#else
- aom_lpf_horizontal_edge_16(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
-#endif
+ aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr);
break;
// no filtering
default: break;
}
- // process the internal edge
- if (params.filter_length_internal) {
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p + 4 * dst_stride),
- dst_stride, params.mblim, params.lim,
- params.hev_thr, cm->bit_depth);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_lpf_horizontal_4(p + 4 * dst_stride, dst_stride, params.mblim,
- params.lim, params.hev_thr);
- }
-#endif // CONFIG_LPF_DIRECT
+
// advance the destination pointer
- p += MI_SIZE;
+ advance_units = tx_size_high_unit[tx_size];
+ y += advance_units;
+ p += advance_units * dst_stride * MI_SIZE;
}
}
}
-#endif // CONFIG_PARALLEL_DEBLOCKING
-void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
- struct macroblockd_plane *planes, int start, int stop,
-#if CONFIG_LPF_SB
- int col_start, int col_end,
-#endif
- int y_only) {
-#if CONFIG_LOOPFILTER_LEVEL
- // y_only no longer has its original meaning.
- // Here it means which plane to filter
- // when y_only = {0, 1, 2}, it means we are searching for filter level for
- // Y/U/V plane individually.
- const int plane_start = y_only;
- const int plane_end = plane_start + 1;
-#else
- const int num_planes = y_only ? 1 : MAX_MB_PLANE;
- const int plane_start = 0;
- const int plane_end = num_planes;
-#endif // CONFIG_LOOPFILTER_LEVEL
-#if !CONFIG_LPF_SB
+static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
+ MACROBLOCKD *xd, int start, int stop,
+ int plane_start, int plane_end) {
+ struct macroblockd_plane *pd = xd->plane;
const int col_start = 0;
const int col_end = cm->mi_cols;
-#endif // CONFIG_LPF_SB
int mi_row, mi_col;
int plane;
-#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
- CONFIG_CB4X4
-
-#if !CONFIG_PARALLEL_DEBLOCKING
-#if CONFIG_VAR_TX
- for (int i = 0; i < MAX_MB_PLANE; ++i)
- memset(cm->top_txfm_context[i], TX_32X32, cm->mi_cols << TX_UNIT_WIDE_LOG2);
-#endif // CONFIG_VAR_TX
- for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
- MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-#if CONFIG_VAR_TX
- for (int i = 0; i < MAX_MB_PLANE; ++i)
- memset(cm->left_txfm_context[i], TX_32X32,
- MAX_MIB_SIZE << TX_UNIT_HIGH_LOG2);
-#endif // CONFIG_VAR_TX
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
-
- for (plane = plane_start; plane < plane_end; ++plane) {
- av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
- mi_row, mi_col, plane);
- av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
- mi_row, mi_col, plane);
- }
- }
- }
-#else
-
- // filter all vertical edges in every 64x64 super block
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
- for (plane = plane_start; plane < plane_end; ++plane) {
- av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
- }
- }
- }
+ for (plane = plane_start; plane < plane_end; plane++) {
+ if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
+ break;
+ else if (plane == 1 && !(cm->lf.filter_level_u))
+ continue;
+ else if (plane == 2 && !(cm->lf.filter_level_v))
+ continue;
- // filter all horizontal edges in every 64x64 super block
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
- for (plane = plane_start; plane < plane_end; ++plane) {
- av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
+#if LOOP_FILTER_BITMASK
+ // filter all vertical edges every superblock (could be 128x128 or 64x64)
+ for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
+ for (mi_col = col_start; mi_col < col_end;
+ mi_col += cm->seq_params.mib_size) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+
+ av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
+ pd[plane].subsampling_y, stop, col_end);
+ av1_filter_block_plane_ver(cm, &pd[plane], plane, mi_row, mi_col);
}
}
- }
-#endif // CONFIG_PARALLEL_DEBLOCKING
-#else // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
+ // filter all horizontal edges every superblock
+ for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
+ for (mi_col = col_start; mi_col < col_end;
+ mi_col += cm->seq_params.mib_size) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
-#if CONFIG_PARALLEL_DEBLOCKING
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
- // filter all vertical edges in every 64x64 super block
- for (plane = plane_start; plane < plane_end; plane += 1) {
- av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
+ av1_filter_block_plane_hor(cm, &pd[plane], plane, mi_row, mi_col);
}
}
- }
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
- // filter all horizontal edges in every 64x64 super block
- for (plane = plane_start; plane < plane_end; plane += 1) {
- av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
+#else
+ if (cm->lf.combine_vert_horz_lf) {
+ // filter all vertical and horizontal edges in every 128x128 super block
+ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
+ // filter vertical edges
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ // filter horizontal edges
+ if (mi_col - MAX_MIB_SIZE >= 0) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
+ mi_row, mi_col - MAX_MIB_SIZE, plane,
+ plane + 1);
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
+ }
+ }
+ // filter horizontal edges
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col - MAX_MIB_SIZE, plane, plane + 1);
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
}
- }
- }
-#else // CONFIG_PARALLEL_DEBLOCKING
- enum lf_path path;
- LOOP_FILTER_MASK lfm;
-
- if (y_only)
- path = LF_PATH_444;
- else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
- path = LF_PATH_420;
- else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
- path = LF_PATH_444;
- else
- path = LF_PATH_SLOW;
-
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
-
- // TODO(JBB): Make setup_mask work for non 420.
- av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
-
- av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
- av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
- for (plane = 1; plane < num_planes; ++plane) {
- switch (path) {
- case LF_PATH_420:
- av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
- av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
- break;
- case LF_PATH_444:
- av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
- av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
- break;
- case LF_PATH_SLOW:
- av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
- mi_row, mi_col, plane);
- av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
- mi_row, mi_col, plane);
-
- break;
+ } else {
+ // filter all vertical edges in every 128x128 super block
+ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ }
+ }
+
+ // filter all horizontal edges in every 128x128 super block
+ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+ for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
+ av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ mi_col, plane, plane + 1);
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
}
}
}
+#endif // LOOP_FILTER_BITMASK
}
-#endif // CONFIG_PARALLEL_DEBLOCKING
-#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
}
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd, int frame_filter_level,
-#if CONFIG_LOOPFILTER_LEVEL
- int frame_filter_level_r,
-#endif
- int y_only, int partial_frame
-#if CONFIG_LPF_SB
- ,
- int mi_row, int mi_col
-#endif
- ) {
+ MACROBLOCKD *xd, int plane_start, int plane_end,
+ int partial_frame) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- int orig_filter_level[2] = { cm->lf.filter_level[0], cm->lf.filter_level[1] };
-#else
- int orig_filter_level = cm->lf.filter_level;
-#endif
-#endif
-#if CONFIG_LPF_SB
- if (partial_frame && !frame_filter_level) return;
-#else
-#if CONFIG_LOOPFILTER_LEVEL
- if (!frame_filter_level && !frame_filter_level_r) return;
-#else
- if (!frame_filter_level) return;
-#endif
-#endif // CONFIG_LPF_SB
-#if CONFIG_LPF_SB
- int start_mi_col;
- int end_mi_col;
-
- // In the experiment of deblocking filtering per superblock.
- // When partial_frame is 1, it indicates we are searching for the best filter
- // level for current superblock. We reuse frame_filter_level as filter level
- // for superblock, no longer for the whole frame.
- // When partial_frame is 0, it's in the actual filtering stage for the frame
- if (partial_frame) {
- start_mi_row = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
- start_mi_col = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
- const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
- const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
- end_mi_row = AOMMIN(mi_row_range, cm->mi_rows);
- end_mi_col = AOMMIN(mi_col_range, cm->mi_cols);
-
- av1_loop_filter_sb_level_init(cm, mi_row, mi_col, frame_filter_level);
- } else {
- start_mi_row = 0;
- mi_rows_to_filter = cm->mi_rows;
- end_mi_row = start_mi_row + mi_rows_to_filter;
- start_mi_col = 0;
- end_mi_col = cm->mi_cols;
- }
-#else
start_mi_row = 0;
mi_rows_to_filter = cm->mi_rows;
if (partial_frame && cm->mi_rows > 8) {
@@ -3737,61 +1917,7 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
-#if CONFIG_LOOPFILTER_LEVEL
- // TODO(chengchen): refactor the code such that y_only has its matching
- // meaning. Now it means the plane to be filtered in this experiment.
- av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
- y_only);
-#else
- av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
-#endif
-#endif // CONFIG_LPF_SB
-
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- cm->lf.filter_level[0] = frame_filter_level;
- cm->lf.filter_level[1] = frame_filter_level_r;
-#else
- cm->lf.filter_level = frame_filter_level;
-#endif
-#endif
-
-#if CONFIG_LPF_SB
- av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row,
- start_mi_col, end_mi_col, y_only);
-#else
- av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
-#endif // CONFIG_LPF_SB
-
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- cm->lf.filter_level[0] = orig_filter_level[0];
- cm->lf.filter_level[1] = orig_filter_level[1];
-#else
- cm->lf.filter_level = orig_filter_level;
-#endif
-#endif
-}
-
-void av1_loop_filter_data_reset(LFWorkerData *lf_data,
- YV12_BUFFER_CONFIG *frame_buffer,
- struct AV1Common *cm,
- const struct macroblockd_plane *planes) {
- lf_data->frame_buffer = frame_buffer;
- lf_data->cm = cm;
- lf_data->start = 0;
- lf_data->stop = 0;
- lf_data->y_only = 0;
- memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
-}
-
-int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
- (void)unused;
-#if !CONFIG_LPF_SB
- av1_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->start, lf_data->stop, lf_data->y_only);
-#else
- (void)lf_data;
-#endif // CONFIG_LPF_SB
- return 1;
+ av1_loop_filter_frame_init(cm, plane_start, plane_end);
+ loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
+ plane_end);
}
diff --git a/third_party/aom/av1/common/av1_loopfilter.h b/third_party/aom/av1/common/av1_loopfilter.h
index ee32c368c..c35c3b2dc 100644
--- a/third_party/aom/av1/common/av1_loopfilter.h
+++ b/third_party/aom/av1/common/av1_loopfilter.h
@@ -12,9 +12,9 @@
#ifndef AV1_COMMON_LOOPFILTER_H_
#define AV1_COMMON_LOOPFILTER_H_
-#include "aom_ports/mem.h"
-#include "./aom_config.h"
+#include "config/aom_config.h"
+#include "aom_ports/mem.h"
#include "av1/common/blockd.h"
#include "av1/common/seg_common.h"
@@ -27,37 +27,111 @@ extern "C" {
#define SIMD_WIDTH 16
-#define MAX_MODE_LF_DELTAS 2
-
enum lf_path {
LF_PATH_420,
LF_PATH_444,
LF_PATH_SLOW,
};
+#if LOOP_FILTER_BITMASK
+typedef struct {
+ uint64_t bits[4];
+} FilterMask;
+
+// This structure holds bit masks for all 4x4 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// For Y plane, 4x4 in 64x64 requires 16x16 = 256 bit, therefore we use 4
+// uint64_t; For U, V plane, for 420 format, plane size is 32x32, thus we use
+// a uint64_t to represent bitmask.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block. Above_ entries refer to whether or not to apply a
+// filter on the above border.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+ FilterMask left_y[TX_SIZES];
+ FilterMask above_y[TX_SIZES];
+ FilterMask left_u[TX_SIZES];
+ FilterMask above_u[TX_SIZES];
+ FilterMask left_v[TX_SIZES];
+ FilterMask above_v[TX_SIZES];
+
+ // Y plane vertical edge and horizontal edge filter level
+ uint8_t lfl_y_hor[MI_SIZE_64X64][MI_SIZE_64X64];
+ uint8_t lfl_y_ver[MI_SIZE_64X64][MI_SIZE_64X64];
+
+ // U plane vertical edge and horizontal edge filter level
+ uint8_t lfl_u_hor[MI_SIZE_64X64][MI_SIZE_64X64];
+ uint8_t lfl_u_ver[MI_SIZE_64X64][MI_SIZE_64X64];
+
+ // V plane vertical edge and horizontal edge filter level
+ uint8_t lfl_v_hor[MI_SIZE_64X64][MI_SIZE_64X64];
+ uint8_t lfl_v_ver[MI_SIZE_64X64][MI_SIZE_64X64];
+} LoopFilterMask;
+
+// To determine whether to apply loop filtering at one transform block edge,
+// we need information of the neighboring transform block. Specifically,
+// in determining a vertical edge, we need the information of the tx block
+// to its left. For a horizontal edge, we need info of the tx block above it.
+// Thus, we need to record info of right column and bottom row of tx blocks.
+// We record the information of the neighboring superblock, when bitmask
+// building for a superblock is finished. And it will be used for next
+// superblock bitmask building.
+// Information includes:
+// ------------------------------------------------------------
+// MI_SIZE_64X64
+// Y tx_size above |--------------|
+// Y tx_size left |--------------|
+// UV tx_size above |--------------|
+// UV tx_size left |--------------|
+// Y level above |--------------|
+// Y level left |--------------|
+// U level above |--------------|
+// U level left |--------------|
+// V level above |--------------|
+// V level left |--------------|
+// skip |--------------|
+// ------------------------------------------------------------
+typedef struct {
+ TX_SIZE tx_size_y_above[MI_SIZE_64X64];
+ TX_SIZE tx_size_y_left[MI_SIZE_64X64];
+ TX_SIZE tx_size_uv_above[MI_SIZE_64X64];
+ TX_SIZE tx_size_uv_left[MI_SIZE_64X64];
+ uint8_t y_level_above[MI_SIZE_64X64];
+ uint8_t y_level_left[MI_SIZE_64X64];
+ uint8_t u_level_above[MI_SIZE_64X64];
+ uint8_t u_level_left[MI_SIZE_64X64];
+ uint8_t v_level_above[MI_SIZE_64X64];
+ uint8_t v_level_left[MI_SIZE_64X64];
+ uint8_t skip[MI_SIZE_64X64];
+} LpfSuperblockInfo;
+#endif // LOOP_FILTER_BITMASK
+
struct loopfilter {
-#if CONFIG_LOOPFILTER_LEVEL
int filter_level[2];
int filter_level_u;
int filter_level_v;
-#else
- int filter_level;
-#endif
int sharpness_level;
- int last_sharpness_level;
uint8_t mode_ref_delta_enabled;
uint8_t mode_ref_delta_update;
- // 0 = Intra, Last, Last2+Last3(CONFIG_EXT_REFS),
- // GF, BRF(CONFIG_EXT_REFS), ARF2(CONFIG_EXT_REFS), ARF
- int8_t ref_deltas[TOTAL_REFS_PER_FRAME];
- int8_t last_ref_deltas[TOTAL_REFS_PER_FRAME];
+ // 0 = Intra, Last, Last2+Last3,
+ // GF, BRF, ARF2, ARF
+ int8_t ref_deltas[REF_FRAMES];
// 0 = ZERO_MV, MV
int8_t mode_deltas[MAX_MODE_LF_DELTAS];
- int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
+
+ int combine_vert_horz_lf;
+
+#if LOOP_FILTER_BITMASK
+ LoopFilterMask *lfm;
+ size_t lfm_num;
+ int lfm_stride;
+ LpfSuperblockInfo neighbor_sb_lpf_info;
+#endif // LOOP_FILTER_BITMASK
};
// Need to align this structure so when it is declared and
@@ -70,127 +144,56 @@ typedef struct {
typedef struct {
loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
-#if CONFIG_LOOPFILTER_LEVEL
- uint8_t lvl[MAX_SEGMENTS][2][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
-#else
- uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
-#endif
+ uint8_t lvl[MAX_MB_PLANE][MAX_SEGMENTS][2][REF_FRAMES][MAX_MODE_LF_DELTAS];
} loop_filter_info_n;
-// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block. Above_ entries refer to whether or not to apply a
-// filter on the above border. Int_ entries refer to whether or not to
-// apply borders on the 4x4 edges within the 8x8 block that each bit
-// represents.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
- uint64_t left_y[TX_SIZES];
- uint64_t above_y[TX_SIZES];
- uint64_t int_4x4_y;
- uint16_t left_uv[TX_SIZES];
- uint16_t above_uv[TX_SIZES];
- uint16_t left_int_4x4_uv;
- uint16_t above_int_4x4_uv;
- uint8_t lfl_y[MAX_MIB_SIZE][MAX_MIB_SIZE];
- uint8_t lfl_uv[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
-} LOOP_FILTER_MASK;
-
/* assorted loopfilter functions which get used elsewhere */
struct AV1Common;
struct macroblockd;
struct AV1LfSyncData;
-// This function sets up the bit masks for the entire 64x64 region represented
-// by mi_row, mi_col.
-void av1_setup_mask(struct AV1Common *const cm, const int mi_row,
- const int mi_col, MODE_INFO **mi_8x8,
- const int mode_info_stride, LOOP_FILTER_MASK *lfm);
-
-void av1_filter_block_plane_ss00_ver(struct AV1Common *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm);
-void av1_filter_block_plane_ss00_hor(struct AV1Common *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm);
-void av1_filter_block_plane_ss11_ver(struct AV1Common *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm);
-void av1_filter_block_plane_ss11_hor(struct AV1Common *const cm,
- struct macroblockd_plane *const plane,
- int mi_row, LOOP_FILTER_MASK *lfm);
-
-void av1_filter_block_plane_non420_ver(struct AV1Common *const cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mi_8x8, int mi_row,
- int mi_col, int pl);
-void av1_filter_block_plane_non420_hor(struct AV1Common *const cm,
- struct macroblockd_plane *plane,
- MODE_INFO **mi_8x8, int mi_row,
- int mi_col, int pl);
-
void av1_loop_filter_init(struct AV1Common *cm);
-// Update the loop filter for the current frame.
-// This should be called before av1_loop_filter_rows(),
-// av1_loop_filter_frame()
-// calls this function directly.
-void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl,
- int default_filt_lvl_r
-#if CONFIG_LOOPFILTER_LEVEL
- ,
- int plane
-#endif
- );
+void av1_loop_filter_frame_init(struct AV1Common *cm, int plane_start,
+ int plane_end);
-#if CONFIG_LPF_SB
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd *mbd, int filter_level,
- int y_only, int partial_frame, int mi_row,
- int mi_col);
-
-// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
-void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- struct AV1Common *cm,
- struct macroblockd_plane *planes, int start, int stop,
- int col_start, int col_end, int y_only);
-
-void av1_loop_filter_sb_level_init(struct AV1Common *cm, int mi_row, int mi_col,
- int lvl);
-#else
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd *mbd, int filter_level,
-#if CONFIG_LOOPFILTER_LEVEL
- int filter_level_r,
-#endif
- int y_only, int partial_frame);
+ struct macroblockd *mbd, int plane_start,
+ int plane_end, int partial_frame);
+
+void av1_filter_block_plane_vert(const struct AV1Common *const cm,
+ const MACROBLOCKD *const xd, const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row, const uint32_t mi_col);
-// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
-void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- struct AV1Common *cm,
- struct macroblockd_plane *planes, int start, int stop,
- int y_only);
-#endif // CONFIG_LPF_SB
+void av1_filter_block_plane_horz(const struct AV1Common *const cm,
+ const MACROBLOCKD *const xd, const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row, const uint32_t mi_col);
typedef struct LoopFilterWorkerData {
YV12_BUFFER_CONFIG *frame_buffer;
struct AV1Common *cm;
struct macroblockd_plane planes[MAX_MB_PLANE];
-
- int start;
- int stop;
- int y_only;
+ // TODO(Ranjit): When the filter functions are modified to use xd->lossless
+ // add lossless as a member here.
+ MACROBLOCKD *xd;
} LFWorkerData;
-void av1_loop_filter_data_reset(LFWorkerData *lf_data,
- YV12_BUFFER_CONFIG *frame_buffer,
- struct AV1Common *cm,
- const struct macroblockd_plane *planes);
+#if LOOP_FILTER_BITMASK
+void av1_setup_bitmask(struct AV1Common *const cm, int mi_row, int mi_col,
+ int plane, int subsampling_x, int subsampling_y,
+ int row_end, int col_end);
+
+void av1_filter_block_plane_ver(struct AV1Common *const cm,
+ struct macroblockd_plane *const plane_ptr,
+ int pl, int mi_row, int mi_col);
+
+void av1_filter_block_plane_hor(struct AV1Common *const cm,
+ struct macroblockd_plane *const plane, int pl,
+ int mi_row, int mi_col);
+#endif
-// Operates on the rows described by 'lf_data'.
-int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/av1_rtcd.c b/third_party/aom/av1/common/av1_rtcd.c
index f9ccd1979..38e26bee1 100644
--- a/third_party/aom/av1/common/av1_rtcd.c
+++ b/third_party/aom/av1/common/av1_rtcd.c
@@ -8,9 +8,11 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#define RTCD_C
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "aom_ports/aom_once.h"
void av1_rtcd() {
diff --git a/third_party/aom/av1/common/av1_rtcd_defs.pl b/third_party/aom/av1/common/av1_rtcd_defs.pl
index 203426e59..6aa925515 100755
--- a/third_party/aom/av1/common/av1_rtcd_defs.pl
+++ b/third_party/aom/av1/common/av1_rtcd_defs.pl
@@ -1,3 +1,13 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
sub av1_common_forward_decls() {
print <<EOF
/*
@@ -13,6 +23,7 @@ print <<EOF
#include "av1/common/convolve.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/odintrin.h"
+#include "av1/common/restoration.h"
struct macroblockd;
@@ -21,9 +32,22 @@ struct macroblock;
struct txfm_param;
struct aom_variance_vtable;
struct search_site_config;
-struct mv;
-union int_mv;
struct yv12_buffer_config;
+
+/* Function pointers return by CfL functions */
+typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst);
+
+typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+
+typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
EOF
}
forward_decls qw/av1_common_forward_decls/;
@@ -38,233 +62,55 @@ if ($opts{arch} eq "x86_64") {
$avx2_x86_64 = 'avx2';
}
-#
-# 10/12-tap convolution filters
-#
-add_proto qw/void av1_lowbd_convolve_init/, "void";
-specialize qw/av1_lowbd_convolve_init ssse3/;
-
-add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
-specialize qw/av1_convolve_horiz ssse3/;
-
-add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
-specialize qw/av1_convolve_vert ssse3/;
-
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_highbd_convolve_init/, "void";
- specialize qw/av1_highbd_convolve_init sse4_1/;
- add_proto qw/void av1_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
- specialize qw/av1_highbd_convolve_horiz sse4_1/;
- add_proto qw/void av1_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
- specialize qw/av1_highbd_convolve_vert sse4_1/;
-}
-
-#
-# Inverse dct
-#
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht4x4_16_add sse2/;
-
- add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht4x8_32_add sse2/;
-
- add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht8x4_32_add sse2/;
-
- add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht8x16_128_add sse2/;
-
- add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht16x8_128_add sse2/;
-
- add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht16x32_512_add sse2/;
-
- add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht32x16_512_add sse2/;
-
- add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_convolve_horiz_rs/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn";
+specialize qw/av1_convolve_horiz_rs sse4_1/;
- add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_convolve_horiz_rs/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd";
+specialize qw/av1_highbd_convolve_horiz_rs sse4_1/;
- add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params";
- add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps";
- add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht8x8_64_add sse2/;
+specialize qw/av1_wiener_convolve_add_src sse2 avx2 neon/;
+specialize qw/av1_highbd_wiener_convolve_add_src ssse3/;
+specialize qw/av1_highbd_wiener_convolve_add_src avx2/;
- add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- specialize qw/av1_iht16x16_256_add sse2 avx2/;
+# directional intra predictor functions
+add_proto qw/void av1_dr_prediction_z1/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy";
+add_proto qw/void av1_dr_prediction_z2/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy";
+add_proto qw/void av1_dr_prediction_z3/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy";
- add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-} else {
- add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_iht4x4_16_add sse2 neon/;
- }
-
- add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht4x8_32_add sse2/;
-
- add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht8x4_32_add sse2/;
-
- add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht8x16_128_add sse2/;
-
- add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht16x8_128_add sse2/;
-
- add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht16x32_512_add sse2/;
-
- add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- specialize qw/av1_iht32x16_512_add sse2/;
-
- add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_iht8x8_64_add sse2 neon/;
- }
-
- add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_iht16x16_256_add sse2 avx2/;
- }
-
- add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-
- if (aom_config("CONFIG_EXT_TX") ne "yes") {
- if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_iht4x4_16_add msa/;
- }
- if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_iht8x8_64_add msa/;
- }
- if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_iht16x16_256_add msa/;
- }
- }
-}
-
-add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-
-if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- add_proto qw/void av1_iht32x64_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- add_proto qw/void av1_iht64x32_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-}
-
-if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
- add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
- }
-}
# FILTER_INTRA predictor functions
-if (aom_config("CONFIG_FILTER_INTRA") eq "yes") {
- add_proto qw/void av1_dc_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_v_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_h_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d45_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d135_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d117_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d153_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d207_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_d63_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- add_proto qw/void av1_tm_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left";
- # High bitdepth functions
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_highbd_dc_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_v_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_h_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d45_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d135_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d117_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d153_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d207_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_d63_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- add_proto qw/void av1_highbd_tm_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int bd";
- }
-}
+add_proto qw/void av1_filter_intra_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode";
+specialize qw/av1_filter_intra_predictor sse4_1/;
# High bitdepth functions
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- #
- # Sub Pixel Filters
- #
- add_proto qw/void av1_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-
- add_proto qw/void av1_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-
- add_proto qw/void av1_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8/, "$sse2_x86_64";
-
- add_proto qw/void av1_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8_horiz/, "$sse2_x86_64";
-
- add_proto qw/void av1_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
- add_proto qw/void av1_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8_avg/, "$sse2_x86_64";
-
- add_proto qw/void av1_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
-
- add_proto qw/void av1_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/av1_highbd_convolve8_avg_vert/, "$sse2_x86_64";
-
- #
- # dct
- #
- add_proto qw/void av1_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
- add_proto qw/void av1_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+#
+# Sub Pixel Filters
+#
+add_proto qw/void av1_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- add_proto qw/void av1_highbd_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- add_proto qw/void av1_highbd_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+specialize qw/av1_highbd_convolve8/, "$sse2_x86_64";
- add_proto qw/void av1_highbd_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+specialize qw/av1_highbd_convolve8_horiz/, "$sse2_x86_64";
- add_proto qw/void av1_highbd_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+add_proto qw/void av1_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
- add_proto qw/void av1_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+#inv txfm
+add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
+specialize qw/av1_inv_txfm_add ssse3 avx2/;
- add_proto qw/void av1_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-}
+add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-#inv txfm
add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
@@ -272,227 +118,128 @@ add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *out
add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
-}
+specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
-}
+specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_inv_txfm2d_add_16x16 sse4_1/;
-}
+specialize qw/av1_inv_txfm2d_add_16x16 sse4_1/;
add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
- specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
-}
-if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-}
+specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
-#
-# Encoder functions below this point.
-#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-
- # ENCODEMB INVOKE
-
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- # the transform coefficients are held in 32-bit
- # values, so the assembler code for av1_block_error can no longer be used.
- add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/av1_block_error avx2/;
-
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp sse2 avx2/;
-
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_32x32 avx2/;
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- }
- } else {
- add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/av1_block_error sse2 avx2 msa/;
-
- add_proto qw/int64_t av1_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
- specialize qw/av1_block_error_fp neon sse2/;
-
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp neon sse2 avx2/, "$ssse3_x86_64";
-
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_32x32 avx2/, "$ssse3_x86_64";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- }
-
- }
-
- # fdct functions
-
- add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_fht4x4 sse2/;
- }
+add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_16x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_64x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-
- add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_fht8x8 sse2/;
- }
-
- add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_fht16x16 sse2 avx2/;
- }
+specialize qw/av1_inv_txfm2d_add_64x64 sse4_1/;
- add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
- specialize qw/av1_fht32x32 sse2 avx2/;
- }
+add_proto qw/void av1_inv_txfm2d_add_4x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_16x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_8x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_32x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- add_proto qw/void av1_fht64x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- add_proto qw/void av1_fht32x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- }
+# directional intra predictor functions
+add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd";
+add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd";
+add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd";
- add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht4x8 sse2/;
+# build compound seg mask functions
+add_proto qw/void av1_build_compound_diffwtd_mask/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w";
+specialize qw/av1_build_compound_diffwtd_mask sse4_1/;
- add_proto qw/void av1_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht8x4 sse2/;
+add_proto qw/void av1_build_compound_diffwtd_mask_highbd/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd";
+specialize qw/av1_build_compound_diffwtd_mask_highbd ssse3 avx2/;
- add_proto qw/void av1_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht8x16 sse2/;
+add_proto qw/void av1_build_compound_diffwtd_mask_d16/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd";
+specialize qw/av1_build_compound_diffwtd_mask_d16 sse4_1 neon/;
- add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht16x8 sse2/;
-
- add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht16x32 sse2/;
+#
+# Encoder functions below this point.
+#
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht32x16 sse2/;
+ # ENCODEMB INVOKE
- add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+ # the transform coefficients are held in 32-bit
+ # values, so the assembler code for av1_block_error can no longer be used.
+ add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/av1_block_error avx2/;
- add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+ add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp sse2 avx2/;
- add_proto qw/void av1_fht8x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+ add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp_32x32 avx2/;
- add_proto qw/void av1_fht32x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+ add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp_64x64 avx2/;
- if (aom_config("CONFIG_HIGHBITDEPTH") ne "yes") {
- if (aom_config("CONFIG_EXT_TX") ne "yes") {
- if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_fht4x4 msa/;
- }
- if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_fht8x8 msa/;
- }
- if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_fht16x16 msa/;
- }
- }
- }
+ # fdct functions
- add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type";
+ add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
#fwd txfm
+ add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
+ specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1/;
+
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
- specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
- }
+ specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
- specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
- }
+ specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
- specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
- }
+ specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
- specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
- }
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- }
+ specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
+
+ add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+ add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+
#
# Motion search
#
- add_proto qw/int av1_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
- specialize qw/av1_full_search_sad sse3 sse4_1/;
- $av1_full_search_sad_sse3=av1_full_search_sadx3;
- $av1_full_search_sad_sse4_1=av1_full_search_sadx8;
+ add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
- add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
-
- add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
+ add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
specialize qw/av1_temporal_filter_apply sse2 msa/;
- if (aom_config("CONFIG_AOM_QM") eq "yes") {
- add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
- } else {
- add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
- }
-
- if (aom_config("CONFIG_LGT_FROM_PRED") eq "yes") {
- add_proto qw/void flgt2d_from_pred/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- }
-
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-
- # ENCODEMB INVOKE
- if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
- add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
- add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
-
- add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
- }
- }
-
- add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
- specialize qw/av1_highbd_block_error sse2/;
+ # ENCODEMB INVOKE
- add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+ add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+ specialize qw/av1_highbd_block_error sse2/;
- }
+ add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
- add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
# End av1_high encoder functions
+ # txb
+ add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
+ specialize qw/av1_get_nz_map_contexts sse2/;
+ add_proto qw/void av1_txb_init_levels/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels";
+ specialize qw/av1_txb_init_levels sse4_1/;
+
add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
specialize qw/av1_wedge_sse_from_residuals sse2/;
add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
@@ -500,179 +247,132 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
specialize qw/av1_wedge_compute_delta_squares sse2/;
-}
-# end encoder functions
-
-# If PVQ is enabled, fwd transforms are required by decoder
-if (aom_config("CONFIG_PVQ") eq "yes") {
- # fdct functions
-
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht4x4 sse2/;
-
- add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht8x8 sse2/;
-
- add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht16x16 sse2/;
-
- add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/av1_fwht4x4 sse2/;
- } else {
- add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht4x4 sse2 msa/;
-
- add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht8x8 sse2 msa/;
-
- add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht16x16 sse2 msa/;
-
- add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/av1_fwht4x4 msa sse2/;
- }
+ # hash
+ add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length";
+ specialize qw/av1_get_crc32c_value sse4_2/;
}
+# end encoder functions
# Deringing Functions
-if (aom_config("CONFIG_CDEF") eq "yes") {
- add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
- if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
- add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
- add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
- add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
- } else {
- add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max";
- }
-
- add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
- add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
-
- # VS compiling for 32 bit targets does not support vector types in
- # structs as arguments, which makes the v256 type of the intrinsics
- # hard to support, so optimizations for this target are disabled.
- if ($opts{config} !~ /libs-x86-win32-vs.*/) {
- if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- } else {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
-
- specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- }
- }
-}
+add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
+add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
-# PVQ Functions
+add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
+add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
-if (aom_config("CONFIG_PVQ") eq "yes") {
- add_proto qw/double pvq_search_rdo_double/, "const od_val16 *xcoeff, int n, int k, int *ypulse, double g2, double pvq_norm_lambda, int prev_k";
- specialize qw/pvq_search_rdo_double sse4_1/;
+# VS compiling for 32 bit targets does not support vector types in
+# structs as arguments, which makes the v256 type of the intrinsics
+# hard to support, so optimizations for this target are disabled.
+if ($opts{config} !~ /libs-x86-win32-vs.*/) {
+ specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
}
# WARPED_MOTION / GLOBAL_MOTION functions
-if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
- (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
- add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
- specialize qw/av1_warp_affine sse2 ssse3/;
+add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+specialize qw/av1_warp_affine sse4_1/;
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
- specialize qw/av1_highbd_warp_affine ssse3/;
- }
-}
+add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+specialize qw/av1_highbd_warp_affine sse4_1/;
-if (aom_config("CONFIG_GLOBAL_MOTION") eq "yes" &&
- aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/double compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2";
specialize qw/compute_cross_correlation sse4_1/;
}
# LOOP_RESTORATION functions
-if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
- add_proto qw/void apply_selfguided_restoration/, "uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf";
- specialize qw/apply_selfguided_restoration sse4_1/;
+add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
+specialize qw/apply_selfguided_restoration sse4_1 avx2/;
- add_proto qw/void av1_selfguided_restoration/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
- specialize qw/av1_selfguided_restoration sse4_1/;
+add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
+ int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
+ int sgr_params_idx, int bit_depth, int highbd";
+specialize qw/av1_selfguided_restoration sse4_1 avx2/;
- add_proto qw/void av1_highpass_filter/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
- specialize qw/av1_highpass_filter sse4_1/;
+# CONVOLVE_ROUND/COMPOUND_ROUND functions
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void apply_selfguided_restoration_highbd/, "uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
- specialize qw/apply_selfguided_restoration_highbd sse4_1/;
+add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+
+ add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
+ add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
+
+ specialize qw/av1_convolve_2d_sr sse2 avx2 neon/;
+ specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/;
+ specialize qw/av1_convolve_x_sr sse2 avx2 neon/;
+ specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
+ specialize qw/av1_convolve_2d_scale sse4_1/;
+ specialize qw/av1_jnt_convolve_2d ssse3 avx2 neon/;
+ specialize qw/av1_jnt_convolve_2d_copy sse2 avx2 neon/;
+ specialize qw/av1_jnt_convolve_x sse2 avx2 neon/;
+ specialize qw/av1_jnt_convolve_y sse2 avx2 neon/;
+ specialize qw/av1_highbd_convolve_2d_copy_sr sse2 avx2/;
+ specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2/;
+ specialize qw/av1_highbd_convolve_x_sr ssse3 avx2/;
+ specialize qw/av1_highbd_convolve_y_sr ssse3 avx2/;
+ specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
+ specialize qw/av1_highbd_jnt_convolve_2d sse4_1 avx2/;
+ specialize qw/av1_highbd_jnt_convolve_x sse4_1 avx2/;
+ specialize qw/av1_highbd_jnt_convolve_y sse4_1 avx2/;
+ specialize qw/av1_highbd_jnt_convolve_2d_copy sse4_1 avx2/;
- add_proto qw/void av1_selfguided_restoration_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps";
- specialize qw/av1_selfguided_restoration_highbd sse4_1/;
+# INTRA_EDGE functions
+add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
+specialize qw/av1_filter_intra_edge sse4_1/;
+add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz";
+specialize qw/av1_upsample_intra_edge sse4_1/;
- add_proto qw/void av1_highpass_filter_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
- specialize qw/av1_highpass_filter_highbd sse4_1/;
- }
-}
+add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
+specialize qw/av1_filter_intra_edge_high sse4_1/;
+add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd";
+specialize qw/av1_upsample_intra_edge_high sse4_1/;
-# CONVOLVE_ROUND/COMPOUND_ROUND functions
+# CFL
+add_proto qw/cfl_subtract_average_fn get_subtract_average_fn/, "TX_SIZE tx_size";
+specialize qw/get_subtract_average_fn sse2 avx2 neon vsx/;
-if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
- add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
- specialize qw/av1_convolve_2d sse2/;
- add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
- specialize qw/av1_convolve_rounding avx2/;
-
- add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
- if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
- specialize qw/av1_convolve_2d_scale sse4_1/;
- }
-
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
- specialize qw/av1_highbd_convolve_2d ssse3/;
- add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
- specialize qw/av1_highbd_convolve_rounding avx2/;
-
- add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
- if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
- specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
- }
- }
-}
+add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_420_lbd ssse3 avx2 neon/;
-# INTRA_EDGE functions
-if (aom_config("CONFIG_INTRA_EDGE") eq "yes") {
- add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
- specialize qw/av1_filter_intra_edge sse4_1/;
- add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz";
- specialize qw/av1_upsample_intra_edge sse4_1/;
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
- specialize qw/av1_filter_intra_edge_high sse4_1/;
- add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd";
- specialize qw/av1_upsample_intra_edge_high sse4_1/;
- }
-}
+add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_422_lbd ssse3 avx2 neon/;
+
+add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_444_lbd ssse3 avx2 neon/;
+
+add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_420_hbd ssse3 avx2 neon/;
+
+add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_422_hbd ssse3 avx2 neon/;
+
+add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd/, "TX_SIZE tx_size";
+specialize qw/cfl_get_luma_subsampling_444_hbd ssse3 avx2 neon/;
+
+add_proto qw/cfl_predict_lbd_fn get_predict_lbd_fn/, "TX_SIZE tx_size";
+specialize qw/get_predict_lbd_fn ssse3 avx2 neon/;
+
+add_proto qw/cfl_predict_hbd_fn get_predict_hbd_fn/, "TX_SIZE tx_size";
+specialize qw/get_predict_hbd_fn ssse3 avx2 neon/;
+1;
diff --git a/third_party/aom/av1/common/av1_txfm.c b/third_party/aom/av1/common/av1_txfm.c
new file mode 100644
index 000000000..1e6654121
--- /dev/null
+++ b/third_party/aom/av1/common/av1_txfm.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "av1/common/av1_txfm.h"
+
+// av1_cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+const int32_t av1_cospi_arr_data[7][64] = {
+ { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
+ 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837,
+ 822, 807, 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610,
+ 590, 569, 548, 526, 505, 483, 460, 438, 415, 392, 369, 345, 321,
+ 297, 273, 249, 224, 200, 175, 150, 125, 100, 75, 50, 25 },
+ { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987,
+ 1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782,
+ 1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448,
+ 1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009,
+ 965, 921, 876, 830, 784, 737, 690, 642, 595, 546, 498,
+ 449, 400, 350, 301, 251, 201, 151, 100, 50 },
+ { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973,
+ 3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564,
+ 3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896,
+ 2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019,
+ 1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995,
+ 897, 799, 700, 601, 501, 401, 301, 201, 101 },
+ { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946,
+ 7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128,
+ 7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793,
+ 5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038,
+ 3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990,
+ 1795, 1598, 1401, 1202, 1003, 803, 603, 402, 201 },
+ { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
+ 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
+ 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
+ 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076,
+ 7723, 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981,
+ 3590, 3196, 2801, 2404, 2006, 1606, 1205, 804, 402 },
+ { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
+ 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
+ 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
+ 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
+ 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
+ 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
+ { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
+ 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
+ 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
+ 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
+ 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
+ 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
+};
+
+// av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1
+// << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4.
+const int32_t av1_sinpi_arr_data[7][5] = {
+ { 0, 330, 621, 836, 951 }, { 0, 660, 1241, 1672, 1901 },
+ { 0, 1321, 2482, 3344, 3803 }, { 0, 2642, 4964, 6689, 7606 },
+ { 0, 5283, 9929, 13377, 15212 }, { 0, 10566, 19858, 26755, 30424 },
+ { 0, 21133, 39716, 53510, 60849 }
+};
+
+void av1_round_shift_array_c(int32_t *arr, int size, int bit) {
+ int i;
+ if (bit == 0) {
+ return;
+ } else {
+ if (bit > 0) {
+ for (i = 0; i < size; i++) {
+ arr[i] = round_shift(arr[i], bit);
+ }
+ } else {
+ for (i = 0; i < size; i++) {
+ arr[i] = (int32_t)clamp64(((int64_t)1 << (-bit)) * arr[i], INT32_MIN,
+ INT32_MAX);
+ }
+ }
+ }
+}
+
+const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D] = {
+ { TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 },
+ { TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 },
+ { TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 },
+ { TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID,
+ TXFM_TYPE_IDENTITY32 },
+ { TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID }
+};
+
+const int8_t av1_txfm_stage_num_list[TXFM_TYPES] = {
+ 4, // TXFM_TYPE_DCT4
+ 6, // TXFM_TYPE_DCT8
+ 8, // TXFM_TYPE_DCT16
+ 10, // TXFM_TYPE_DCT32
+ 12, // TXFM_TYPE_DCT64
+ 7, // TXFM_TYPE_ADST4
+ 8, // TXFM_TYPE_ADST8
+ 10, // TXFM_TYPE_ADST16
+ 1, // TXFM_TYPE_IDENTITY4
+ 1, // TXFM_TYPE_IDENTITY8
+ 1, // TXFM_TYPE_IDENTITY16
+ 1, // TXFM_TYPE_IDENTITY32
+};
diff --git a/third_party/aom/av1/common/av1_txfm.h b/third_party/aom/av1/common/av1_txfm.h
index bd365de59..5db3233f5 100644
--- a/third_party/aom/av1/common/av1_txfm.h
+++ b/third_party/aom/av1/common/av1_txfm.h
@@ -16,6 +16,8 @@
#include <math.h>
#include <stdio.h>
+#include "config/aom_config.h"
+
#include "av1/common/enums.h"
#include "av1/common/blockd.h"
#include "aom/aom_integer.h"
@@ -25,100 +27,73 @@
extern "C" {
#endif
+#if !defined(DO_RANGE_CHECK_CLAMP)
+#define DO_RANGE_CHECK_CLAMP 0
+#endif
+
+extern const int32_t av1_cospi_arr_data[7][64];
+extern const int32_t av1_sinpi_arr_data[7][5];
+
#define MAX_TXFM_STAGE_NUM 12
static const int cos_bit_min = 10;
static const int cos_bit_max = 16;
-// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
-static const int32_t cospi_arr_data[7][64] = {
- { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
- 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837,
- 822, 807, 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610,
- 590, 569, 548, 526, 505, 483, 460, 438, 415, 392, 369, 345, 321,
- 297, 273, 249, 224, 200, 175, 150, 125, 100, 75, 50, 25 },
- { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987,
- 1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782,
- 1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448,
- 1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009,
- 965, 921, 876, 830, 784, 737, 690, 642, 595, 546, 498,
- 449, 400, 350, 301, 251, 201, 151, 100, 50 },
- { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973,
- 3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564,
- 3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896,
- 2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019,
- 1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995,
- 897, 799, 700, 601, 501, 401, 301, 201, 101 },
- { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946,
- 7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128,
- 7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793,
- 5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038,
- 3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990,
- 1795, 1598, 1401, 1202, 1003, 803, 603, 402, 201 },
- { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
- 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
- 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
- 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076,
- 7723, 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981,
- 3590, 3196, 2801, 2404, 2006, 1606, 1205, 804, 402 },
- { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
- 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
- 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
- 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
- 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
- 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
- { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
- 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
- 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
- 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
- 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
- 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
-};
+static const int NewSqrt2Bits = 12;
+// 2^12 * sqrt(2)
+static const int32_t NewSqrt2 = 5793;
+// 2^12 / sqrt(2)
+static const int32_t NewInvSqrt2 = 2896;
static INLINE const int32_t *cospi_arr(int n) {
- return cospi_arr_data[n - cos_bit_min];
+ return av1_cospi_arr_data[n - cos_bit_min];
}
-static INLINE int32_t round_shift(int32_t value, int bit) {
- assert(bit >= 1);
- return (value + (1 << (bit - 1))) >> bit;
+static INLINE const int32_t *sinpi_arr(int n) {
+ return av1_sinpi_arr_data[n - cos_bit_min];
}
-static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
- int i;
- if (bit == 0) {
- return;
- } else {
- if (bit > 0) {
- for (i = 0; i < size; i++) {
- arr[i] = round_shift(arr[i], bit);
- }
- } else {
- for (i = 0; i < size; i++) {
- arr[i] = arr[i] * (1 << (-bit));
- }
- }
+static INLINE int32_t range_check_value(int32_t value, int8_t bit) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ const int64_t max_value = (1LL << (bit - 1)) - 1;
+ const int64_t min_value = -(1LL << (bit - 1));
+ if (value < min_value || value > max_value) {
+ fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit);
+ assert(0);
}
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+#if DO_RANGE_CHECK_CLAMP
+ bit = AOMMIN(bit, 31);
+ return clamp(value, (1 << (bit - 1)) - 1, -(1 << (bit - 1)));
+#endif // DO_RANGE_CHECK_CLAMP
+ (void)bit;
+ return value;
+}
+
+static INLINE int32_t round_shift(int64_t value, int bit) {
+ assert(bit >= 1);
+ return (int32_t)((value + (1ll << (bit - 1))) >> bit);
}
static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
int bit) {
- int32_t result_32 = w0 * in0 + w1 * in1;
+ int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1);
#if CONFIG_COEFFICIENT_RANGE_CHECKING
- int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
- if (result_64 < INT32_MIN || result_64 > INT32_MAX) {
- printf("%s %d overflow result_32: %d result_64: %" PRId64
- " w0: %d in0: %d w1: %d in1: "
- "%d\n",
- __FILE__, __LINE__, result_32, result_64, w0, in0, w1, in1);
- assert(0 && "half_btf overflow");
- }
+ assert(result_64 >= INT32_MIN && result_64 <= INT32_MAX);
#endif
- return round_shift(result_32, bit);
+ return round_shift(result_64, bit);
}
-typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
- const int8_t *cos_bit, const int8_t *stage_range);
+static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
+ int bd) {
+ return clip_pixel_highbd(dest + (int)trans, bd);
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+
+typedef void (*FwdTxfm2dFunc)(const int16_t *input, int32_t *output, int stride,
+ TX_TYPE tx_type, int bd);
typedef enum TXFM_TYPE {
TXFM_TYPE_DCT4,
@@ -129,88 +104,82 @@ typedef enum TXFM_TYPE {
TXFM_TYPE_ADST4,
TXFM_TYPE_ADST8,
TXFM_TYPE_ADST16,
- TXFM_TYPE_ADST32,
TXFM_TYPE_IDENTITY4,
TXFM_TYPE_IDENTITY8,
TXFM_TYPE_IDENTITY16,
TXFM_TYPE_IDENTITY32,
- TXFM_TYPE_IDENTITY64,
+ TXFM_TYPES,
+ TXFM_TYPE_INVALID,
} TXFM_TYPE;
-typedef struct TXFM_1D_CFG {
- const int txfm_size;
- const int stage_num;
-
- const int8_t *shift;
- const int8_t *stage_range;
- const int8_t *cos_bit;
- const TXFM_TYPE txfm_type;
-} TXFM_1D_CFG;
-
typedef struct TXFM_2D_FLIP_CFG {
+ TX_SIZE tx_size;
int ud_flip; // flip upside down
int lr_flip; // flip left to right
- const TXFM_1D_CFG *col_cfg;
- const TXFM_1D_CFG *row_cfg;
+ const int8_t *shift;
+ int8_t cos_bit_col;
+ int8_t cos_bit_row;
+ int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+ int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+ TXFM_TYPE txfm_type_col;
+ TXFM_TYPE txfm_type_row;
+ int stage_num_col;
+ int stage_num_row;
} TXFM_2D_FLIP_CFG;
-static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
+static INLINE void get_flip_cfg(TX_TYPE tx_type, int *ud_flip, int *lr_flip) {
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- cfg->ud_flip = 0;
- cfg->lr_flip = 0;
+ *ud_flip = 0;
+ *lr_flip = 0;
break;
-#if CONFIG_EXT_TX
case IDTX:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
- cfg->ud_flip = 0;
- cfg->lr_flip = 0;
+ *ud_flip = 0;
+ *lr_flip = 0;
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
case V_FLIPADST:
- cfg->ud_flip = 1;
- cfg->lr_flip = 0;
+ *ud_flip = 1;
+ *lr_flip = 0;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
case H_FLIPADST:
- cfg->ud_flip = 0;
- cfg->lr_flip = 1;
+ *ud_flip = 0;
+ *lr_flip = 1;
break;
case FLIPADST_FLIPADST:
- cfg->ud_flip = 1;
- cfg->lr_flip = 1;
+ *ud_flip = 1;
+ *lr_flip = 1;
break;
-#endif // CONFIG_EXT_TX
default:
- cfg->ud_flip = 0;
- cfg->lr_flip = 0;
+ *ud_flip = 0;
+ *lr_flip = 0;
assert(0);
}
}
-#if CONFIG_TXMG
+static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
+ get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
+}
+
static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return TX_2X2;
-#endif
case TX_4X4: return TX_4X4;
case TX_8X8: return TX_8X8;
case TX_16X16: return TX_16X16;
case TX_32X32: return TX_32X32;
-#if CONFIG_TX64X64
case TX_64X64: return TX_64X64;
case TX_32X64: return TX_64X32;
case TX_64X32: return TX_32X64;
-#endif
case TX_4X8: return TX_8X4;
case TX_8X4: return TX_4X8;
case TX_8X16: return TX_16X8;
@@ -221,6 +190,8 @@ static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
case TX_16X4: return TX_4X16;
case TX_8X32: return TX_32X8;
case TX_32X8: return TX_8X32;
+ case TX_16X64: return TX_64X16;
+ case TX_64X16: return TX_16X64;
default: assert(0); return TX_INVALID;
}
}
@@ -231,7 +202,6 @@ static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
case ADST_DCT: return DCT_ADST;
case DCT_ADST: return ADST_DCT;
case ADST_ADST: return ADST_ADST;
-#if CONFIG_EXT_TX
case FLIPADST_DCT: return DCT_FLIPADST;
case DCT_FLIPADST: return FLIPADST_DCT;
case FLIPADST_FLIPADST: return FLIPADST_FLIPADST;
@@ -244,123 +214,46 @@ static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
case H_ADST: return V_ADST;
case V_FLIPADST: return H_FLIPADST;
case H_FLIPADST: return V_FLIPADST;
-#endif // CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- case MRC_DCT: return MRC_DCT;
-#endif // CONFIG_MRC_TX
default: assert(0); return TX_TYPES;
}
}
-#endif // CONFIG_TXMG
-
-#if CONFIG_MRC_TX
-static INLINE int get_mrc_diff_mask_inter(const int16_t *diff, int diff_stride,
- uint8_t *mask, int mask_stride,
- int width, int height) {
- // placeholder mask generation function
- assert(SIGNAL_MRC_MASK_INTER);
- int n_masked_vals = 0;
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; ++j) {
- mask[i * mask_stride + j] = diff[i * diff_stride + j] > 100 ? 1 : 0;
- n_masked_vals += mask[i * mask_stride + j];
- }
- }
- return n_masked_vals;
-}
-
-static INLINE int get_mrc_pred_mask_inter(const uint8_t *pred, int pred_stride,
- uint8_t *mask, int mask_stride,
- int width, int height) {
- // placeholder mask generation function
- int n_masked_vals = 0;
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; ++j) {
- mask[i * mask_stride + j] = pred[i * pred_stride + j] > 100 ? 1 : 0;
- n_masked_vals += mask[i * mask_stride + j];
- }
- }
- return n_masked_vals;
-}
-
-static INLINE int get_mrc_diff_mask_intra(const int16_t *diff, int diff_stride,
- uint8_t *mask, int mask_stride,
- int width, int height) {
- // placeholder mask generation function
- assert(SIGNAL_MRC_MASK_INTRA);
- int n_masked_vals = 0;
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; ++j) {
- mask[i * mask_stride + j] = diff[i * diff_stride + j] > 100 ? 1 : 0;
- n_masked_vals += mask[i * mask_stride + j];
- }
- }
- return n_masked_vals;
-}
-static INLINE int get_mrc_pred_mask_intra(const uint8_t *pred, int pred_stride,
- uint8_t *mask, int mask_stride,
- int width, int height) {
- // placeholder mask generation function
- int n_masked_vals = 0;
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; ++j) {
- mask[i * mask_stride + j] = pred[i * pred_stride + j] > 100 ? 1 : 0;
- n_masked_vals += mask[i * mask_stride + j];
- }
- }
- return n_masked_vals;
-}
-
-static INLINE int get_mrc_diff_mask(const int16_t *diff, int diff_stride,
- uint8_t *mask, int mask_stride, int width,
- int height, int is_inter) {
- if (is_inter) {
- assert(USE_MRC_INTER && "MRC invalid for inter blocks");
- assert(SIGNAL_MRC_MASK_INTER);
- return get_mrc_diff_mask_inter(diff, diff_stride, mask, mask_stride, width,
- height);
+// Utility function that returns the log of the ratio of the col and row
+// sizes.
+static INLINE int get_rect_tx_log_ratio(int col, int row) {
+ if (col == row) return 0;
+ if (col > row) {
+ if (col == row * 2) return 1;
+ if (col == row * 4) return 2;
+ assert(0 && "Unsupported transform size");
} else {
- assert(USE_MRC_INTRA && "MRC invalid for intra blocks");
- assert(SIGNAL_MRC_MASK_INTRA);
- return get_mrc_diff_mask_intra(diff, diff_stride, mask, mask_stride, width,
- height);
+ if (row == col * 2) return -1;
+ if (row == col * 4) return -2;
+ assert(0 && "Unsupported transform size");
}
+ return 0; // Invalid
}
-static INLINE int get_mrc_pred_mask(const uint8_t *pred, int pred_stride,
- uint8_t *mask, int mask_stride, int width,
- int height, int is_inter) {
- if (is_inter) {
- assert(USE_MRC_INTER && "MRC invalid for inter blocks");
- return get_mrc_pred_mask_inter(pred, pred_stride, mask, mask_stride, width,
- height);
- } else {
- assert(USE_MRC_INTRA && "MRC invalid for intra blocks");
- return get_mrc_pred_mask_intra(pred, pred_stride, mask, mask_stride, width,
- height);
- }
-}
-
-static INLINE int is_valid_mrc_mask(int n_masked_vals, int width, int height) {
- return !(n_masked_vals == 0 || n_masked_vals == (width * height));
-}
-#endif // CONFIG_MRC_TX
-
void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int bd);
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
+ const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
int bd);
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size);
-#if CONFIG_TX64X64
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type);
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type);
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type);
-#endif // CONFIG_TX64X64
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size);
+void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
+ TXFM_2D_FLIP_CFG *cfg);
+void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
+ TXFM_2D_FLIP_CFG *cfg);
+extern const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D];
+extern const int8_t av1_txfm_stage_num_list[TXFM_TYPES];
+static INLINE int get_txw_idx(TX_SIZE tx_size) {
+ return tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
+}
+static INLINE int get_txh_idx(TX_SIZE tx_size) {
+ return tx_size_high_log2[tx_size] - tx_size_high_log2[0];
+}
+#define MAX_TXWH_IDX 5
#ifdef __cplusplus
}
#endif // __cplusplus
diff --git a/third_party/aom/av1/common/blockd.c b/third_party/aom/av1/common/blockd.c
index 7bada8bb1..86b4b5d6c 100644
--- a/third_party/aom/av1/common/blockd.c
+++ b/third_party/aom/av1/common/blockd.c
@@ -16,109 +16,17 @@
#include "av1/common/blockd.h"
#include "av1/common/onyxc_int.h"
-PREDICTION_MODE av1_left_block_mode(const MODE_INFO *cur_mi,
- const MODE_INFO *left_mi, int b) {
- if (b == 0 || b == 2) {
- if (!left_mi || is_inter_block(&left_mi->mbmi)) return DC_PRED;
-
- return get_y_mode(left_mi, b + 1);
- } else {
- assert(b == 1 || b == 3);
- return cur_mi->bmi[b - 1].as_mode;
- }
-}
-
-PREDICTION_MODE av1_above_block_mode(const MODE_INFO *cur_mi,
- const MODE_INFO *above_mi, int b) {
- if (b == 0 || b == 1) {
- if (!above_mi || is_inter_block(&above_mi->mbmi)) return DC_PRED;
-
- return get_y_mode(above_mi, b + 2);
- } else {
- assert(b == 2 || b == 3);
- return cur_mi->bmi[b - 2].as_mode;
- }
+PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi) {
+ if (!left_mi) return DC_PRED;
+ assert(!is_inter_block(left_mi) || is_intrabc_block(left_mi));
+ return left_mi->mode;
}
-#if CONFIG_COEF_INTERLEAVE
-void av1_foreach_transformed_block_interleave(
- const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
- foreach_transformed_block_visitor visit, void *arg) {
- const struct macroblockd_plane *const pd_y = &xd->plane[0];
- const struct macroblockd_plane *const pd_c = &xd->plane[1];
- const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-
- const TX_SIZE tx_log2_y = mbmi->tx_size;
- const TX_SIZE tx_log2_c = av1_get_uv_tx_size(mbmi, pd_c);
- const int tx_sz_y = (1 << tx_log2_y);
- const int tx_sz_c = (1 << tx_log2_c);
-
- const BLOCK_SIZE plane_bsize_y = get_plane_block_size(bsize, pd_y);
- const BLOCK_SIZE plane_bsize_c = get_plane_block_size(bsize, pd_c);
-
- const int num_4x4_w_y = num_4x4_blocks_wide_lookup[plane_bsize_y];
- const int num_4x4_w_c = num_4x4_blocks_wide_lookup[plane_bsize_c];
- const int num_4x4_h_y = num_4x4_blocks_high_lookup[plane_bsize_y];
- const int num_4x4_h_c = num_4x4_blocks_high_lookup[plane_bsize_c];
-
- const int step_y = 1 << (tx_log2_y << 1);
- const int step_c = 1 << (tx_log2_c << 1);
-
- const int max_4x4_w_y =
- get_max_4x4_size(num_4x4_w_y, xd->mb_to_right_edge, pd_y->subsampling_x);
- const int max_4x4_h_y =
- get_max_4x4_size(num_4x4_h_y, xd->mb_to_bottom_edge, pd_y->subsampling_y);
-
- const int extra_step_y = ((num_4x4_w_y - max_4x4_w_y) >> tx_log2_y) * step_y;
-
- const int max_4x4_w_c =
- get_max_4x4_size(num_4x4_w_c, xd->mb_to_right_edge, pd_c->subsampling_x);
- const int max_4x4_h_c =
- get_max_4x4_size(num_4x4_h_c, xd->mb_to_bottom_edge, pd_c->subsampling_y);
-
- const int extra_step_c = ((num_4x4_w_c - max_4x4_w_c) >> tx_log2_c) * step_c;
-
- // The max_4x4_w/h may be smaller than tx_sz under some corner cases,
- // i.e. when the SB is splitted by tile boundaries.
- const int tu_num_w_y = (max_4x4_w_y + tx_sz_y - 1) / tx_sz_y;
- const int tu_num_h_y = (max_4x4_h_y + tx_sz_y - 1) / tx_sz_y;
- const int tu_num_w_c = (max_4x4_w_c + tx_sz_c - 1) / tx_sz_c;
- const int tu_num_h_c = (max_4x4_h_c + tx_sz_c - 1) / tx_sz_c;
- const int tu_num_c = tu_num_w_c * tu_num_h_c;
-
- int tu_idx_c = 0;
- int offset_y, row_y, col_y;
- int offset_c, row_c, col_c;
-
- for (row_y = 0; row_y < tu_num_h_y; row_y++) {
- for (col_y = 0; col_y < tu_num_w_y; col_y++) {
- // luma
- offset_y = (row_y * tu_num_w_y + col_y) * step_y + row_y * extra_step_y;
- visit(0, offset_y, row_y * tx_sz_y, col_y * tx_sz_y, plane_bsize_y,
- tx_log2_y, arg);
- // chroma
- if (tu_idx_c < tu_num_c) {
- row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
- col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
- offset_c = tu_idx_c * step_c + (tu_idx_c / tu_num_w_c) * extra_step_c;
- visit(1, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
- visit(2, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
- tu_idx_c++;
- }
- }
- }
-
- // In 422 case, it's possible that Chroma has more TUs than Luma
- while (tu_idx_c < tu_num_c) {
- row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
- col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
- offset_c = tu_idx_c * step_c + row_c * extra_step_c;
- visit(1, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
- visit(2, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
- tu_idx_c++;
- }
+PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi) {
+ if (!above_mi) return DC_PRED;
+ assert(!is_inter_block(above_mi) || is_intrabc_block(above_mi));
+ return above_mi->mode;
}
-#endif
void av1_foreach_transformed_block_in_plane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
@@ -128,12 +36,8 @@ void av1_foreach_transformed_block_in_plane(
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
// transform size varies per plane, look it up in a common way.
const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
-#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
- AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
-#else
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-#endif
+ get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
const uint8_t txw_unit = tx_size_wide_unit[tx_size];
const uint8_t txh_unit = tx_size_high_unit[tx_size];
const int step = txw_unit * txh_unit;
@@ -147,7 +51,8 @@ void av1_foreach_transformed_block_in_plane(
int blk_row, blk_col;
- const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+ const BLOCK_SIZE max_unit_bsize =
+ get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
@@ -170,95 +75,60 @@ void av1_foreach_transformed_block_in_plane(
}
}
-#if CONFIG_LV_MAP
void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
BLOCK_SIZE bsize, int mi_row, int mi_col,
foreach_transformed_block_visitor visit,
- void *arg) {
- int plane;
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-#if CONFIG_CB4X4
+ void *arg, const int num_planes) {
+ for (int plane = 0; plane < num_planes; ++plane) {
if (!is_chroma_reference(mi_row, mi_col, bsize,
xd->plane[plane].subsampling_x,
xd->plane[plane].subsampling_y))
continue;
-#else
- (void)mi_row;
- (void)mi_col;
-#endif
av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
}
}
-#endif
-#if !CONFIG_PVQ || CONFIG_VAR_TX
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- int plane, TX_SIZE tx_size, int has_eob, int aoff,
- int loff) {
+ int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int has_eob, int aoff, int loff) {
ENTROPY_CONTEXT *const a = pd->above_context + aoff;
ENTROPY_CONTEXT *const l = pd->left_context + loff;
const int txs_wide = tx_size_wide_unit[tx_size];
const int txs_high = tx_size_high_unit[tx_size];
-#if CONFIG_CB4X4
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-#else
- const BLOCK_SIZE bsize = AOMMAX(xd->mi[0]->mbmi.sb_type, BLOCK_8X8);
-#endif
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
// above
if (has_eob && xd->mb_to_right_edge < 0) {
- int i;
const int blocks_wide = max_block_wide(xd, plane_bsize, plane);
- int above_contexts = txs_wide;
- if (above_contexts + aoff > blocks_wide)
- above_contexts = blocks_wide - aoff;
-
- for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
- for (i = above_contexts; i < txs_wide; ++i) a[i] = 0;
+ const int above_contexts = AOMMIN(txs_wide, blocks_wide - aoff);
+ memset(a, has_eob, sizeof(*a) * above_contexts);
+ memset(a + above_contexts, 0, sizeof(*a) * (txs_wide - above_contexts));
} else {
- memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * txs_wide);
+ memset(a, has_eob, sizeof(*a) * txs_wide);
}
// left
if (has_eob && xd->mb_to_bottom_edge < 0) {
- int i;
const int blocks_high = max_block_high(xd, plane_bsize, plane);
- int left_contexts = txs_high;
- if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
-
- for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
- for (i = left_contexts; i < txs_high; ++i) l[i] = 0;
+ const int left_contexts = AOMMIN(txs_high, blocks_high - loff);
+ memset(l, has_eob, sizeof(*l) * left_contexts);
+ memset(l + left_contexts, 0, sizeof(*l) * (txs_high - left_contexts));
} else {
- memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * txs_high);
+ memset(l, has_eob, sizeof(*l) * txs_high);
}
}
-#endif
-
void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
+ BLOCK_SIZE bsize, const int num_planes) {
int i;
int nplanes;
-#if CONFIG_CB4X4
int chroma_ref;
chroma_ref =
is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
xd->plane[1].subsampling_y);
- nplanes = 1 + (MAX_MB_PLANE - 1) * chroma_ref;
-#else
- (void)mi_row;
- (void)mi_col;
- nplanes = MAX_MB_PLANE;
-#endif
+ nplanes = 1 + (num_planes - 1) * chroma_ref;
for (i = 0; i < nplanes; i++) {
struct macroblockd_plane *const pd = &xd->plane[i];
-#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
- AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
-#else
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-#endif
+ get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
const int txs_wide = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int txs_high = block_size_high[plane_bsize] >> tx_size_high_log2[0];
memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * txs_wide);
@@ -266,38 +136,61 @@ void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
}
}
-void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
+void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes) {
+ xd->delta_lf_from_base = 0;
+ const int frame_lf_count =
+ num_planes > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
+ for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) xd->delta_lf[lf_id] = 0;
+}
+
+void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes) {
+ for (int p = 0; p < num_planes; ++p) {
+ set_default_wiener(xd->wiener_info + p);
+ set_default_sgrproj(xd->sgrproj_info + p);
+ }
+}
+
+void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y,
+ const int num_planes) {
int i;
- for (i = 0; i < MAX_MB_PLANE; i++) {
+ for (i = 0; i < num_planes; i++) {
xd->plane[i].plane_type = get_plane_type(i);
xd->plane[i].subsampling_x = i ? ss_x : 0;
xd->plane[i].subsampling_y = i ? ss_y : 0;
}
}
-#if CONFIG_EXT_INTRA
const int16_t dr_intra_derivative[90] = {
- 1, 14666, 7330, 4884, 3660, 2926, 2435, 2084, 1821, 1616, 1451, 1317, 1204,
- 1108, 1026, 955, 892, 837, 787, 743, 703, 666, 633, 603, 574, 548,
- 524, 502, 481, 461, 443, 426, 409, 394, 379, 365, 352, 339, 327,
- 316, 305, 294, 284, 274, 265, 256, 247, 238, 230, 222, 214, 207,
- 200, 192, 185, 179, 172, 166, 159, 153, 147, 141, 136, 130, 124,
- 119, 113, 108, 103, 98, 93, 88, 83, 78, 73, 68, 63, 59,
- 54, 49, 45, 40, 35, 31, 26, 22, 17, 13, 8, 4,
+ // More evenly spread out angles and limited to 10-bit
+ // Values that are 0 will never be used
+ // Approx angle
+ 0, 0, 0, //
+ 1023, 0, 0, // 3, ...
+ 547, 0, 0, // 6, ...
+ 372, 0, 0, 0, 0, // 9, ...
+ 273, 0, 0, // 14, ...
+ 215, 0, 0, // 17, ...
+ 178, 0, 0, // 20, ...
+ 151, 0, 0, // 23, ... (113 & 203 are base angles)
+ 132, 0, 0, // 26, ...
+ 116, 0, 0, // 29, ...
+ 102, 0, 0, 0, // 32, ...
+ 90, 0, 0, // 36, ...
+ 80, 0, 0, // 39, ...
+ 71, 0, 0, // 42, ...
+ 64, 0, 0, // 45, ... (45 & 135 are base angles)
+ 57, 0, 0, // 48, ...
+ 51, 0, 0, // 51, ...
+ 45, 0, 0, 0, // 54, ...
+ 40, 0, 0, // 58, ...
+ 35, 0, 0, // 61, ...
+ 31, 0, 0, // 64, ...
+ 27, 0, 0, // 67, ... (67 & 157 are base angles)
+ 23, 0, 0, // 70, ...
+ 19, 0, 0, // 73, ...
+ 15, 0, 0, 0, 0, // 76, ...
+ 11, 0, 0, // 81, ...
+ 7, 0, 0, // 84, ...
+ 3, 0, 0, // 87, ...
};
-
-#if CONFIG_INTRA_INTERP
-int av1_is_intra_filter_switchable(int angle) {
- assert(angle > 0 && angle < 270);
- if (angle % 45 == 0) return 0;
- if (angle > 90 && angle < 180) {
- return 1;
- } else {
- return ((angle < 90 ? dr_intra_derivative[angle]
- : dr_intra_derivative[270 - angle]) &
- 0xFF) > 0;
- }
-}
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
diff --git a/third_party/aom/av1/common/blockd.h b/third_party/aom/av1/common/blockd.h
index 01a449a1c..3e8d1d6c6 100644
--- a/third_party/aom/av1/common/blockd.h
+++ b/third_party/aom/av1/common/blockd.h
@@ -12,7 +12,7 @@
#ifndef AV1_COMMON_BLOCKD_H_
#define AV1_COMMON_BLOCKD_H_
-#include "./aom_config.h"
+#include "config/aom_config.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/mem.h"
@@ -26,104 +26,40 @@
#include "av1/common/scale.h"
#include "av1/common/seg_common.h"
#include "av1/common/tile_common.h"
-#if CONFIG_PVQ
-#include "av1/common/pvq.h"
-#include "av1/common/pvq_state.h"
-#include "av1/decoder/decint.h"
-#endif
+
#ifdef __cplusplus
extern "C" {
#endif
-#if (CONFIG_CHROMA_SUB8X8 || CONFIG_CHROMA_2X2)
-#define SUB8X8_COMP_REF 0
-#else
-#define SUB8X8_COMP_REF 1
-#endif
+#define USE_B_QUANT_NO_TRELLIS 1
#define MAX_MB_PLANE 3
-#if CONFIG_COMPOUND_SEGMENT
-// Set COMPOUND_SEGMENT_TYPE to one of the three
-// 0: Uniform
-// 1: Difference weighted
-#define COMPOUND_SEGMENT_TYPE 1
-#define MAX_SEG_MASK_BITS 1
+#define MAX_DIFFWTD_MASK_BITS 1
-// SEG_MASK_TYPES should not surpass 1 << MAX_SEG_MASK_BITS
+// DIFFWTD_MASK_TYPES should not surpass 1 << MAX_DIFFWTD_MASK_BITS
typedef enum {
-#if COMPOUND_SEGMENT_TYPE == 0
- UNIFORM_45 = 0,
- UNIFORM_45_INV,
-#elif COMPOUND_SEGMENT_TYPE == 1
DIFFWTD_38 = 0,
DIFFWTD_38_INV,
-#endif // COMPOUND_SEGMENT_TYPE
- SEG_MASK_TYPES,
-} SEG_MASK_TYPE;
-
-#endif // CONFIG_COMPOUND_SEGMENT
+ DIFFWTD_MASK_TYPES,
+} DIFFWTD_MASK_TYPE;
typedef enum {
KEY_FRAME = 0,
INTER_FRAME = 1,
-#if CONFIG_OBU
INTRA_ONLY_FRAME = 2, // replaces intra-only
S_FRAME = 3,
-#endif
FRAME_TYPES,
} FRAME_TYPE;
static INLINE int is_comp_ref_allowed(BLOCK_SIZE bsize) {
- (void)bsize;
-#if SUB8X8_COMP_REF
- return 1;
-#else
return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
-#endif // SUB8X8_COMP_REF
}
static INLINE int is_inter_mode(PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEW_NEWMV;
}
-#if CONFIG_PVQ
-typedef struct PVQ_INFO {
- int theta[PVQ_MAX_PARTITIONS];
- int qg[PVQ_MAX_PARTITIONS];
- int k[PVQ_MAX_PARTITIONS];
- od_coeff y[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
- int nb_bands;
- int off[PVQ_MAX_PARTITIONS];
- int size[PVQ_MAX_PARTITIONS];
- int skip_rest;
- int skip_dir;
- int bs; // log of the block size minus two,
- // i.e. equivalent to aom's TX_SIZE
- // Block skip info, indicating whether DC/AC, is coded.
- PVQ_SKIP_TYPE ac_dc_coded; // bit0: DC coded, bit1 : AC coded (1 means coded)
- tran_low_t dq_dc_residue;
-} PVQ_INFO;
-
-typedef struct PVQ_QUEUE {
- PVQ_INFO *buf; // buffer for pvq info, stored in encoding order
- int curr_pos; // curr position to write PVQ_INFO
- int buf_len; // allocated buffer length
- int last_pos; // last written position of PVQ_INFO in a tile
-} PVQ_QUEUE;
-#endif
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-typedef struct superblock_mi_boundaries {
- int mi_row_begin;
- int mi_col_begin;
- int mi_row_end;
- int mi_col_end;
-} SB_MI_BD;
-
-typedef struct { int16_t KERNEL[4][MAX_SB_SIZE][MAX_SB_SIZE]; } NCOBMC_KERNELS;
-#endif
-
typedef struct {
uint8_t *plane[MAX_MB_PLANE];
int stride[MAX_MB_PLANE];
@@ -135,14 +71,6 @@ static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) {
return mode >= NEAREST_NEARESTMV && mode <= NEW_NEWMV;
}
-#if CONFIG_COMPOUND_SINGLEREF
-static INLINE int is_inter_singleref_comp_mode(PREDICTION_MODE mode) {
- return mode >= SR_NEAREST_NEARMV && mode <= SR_NEW_NEWMV;
-}
-static INLINE int is_inter_anyref_comp_mode(PREDICTION_MODE mode) {
- return is_inter_compound_mode(mode) || is_inter_singleref_comp_mode(mode);
-}
-#endif // CONFIG_COMPOUND_SINGLEREF
static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
static PREDICTION_MODE lut[] = {
@@ -151,42 +79,29 @@ static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
MB_MODE_COUNT, // H_PRED
MB_MODE_COUNT, // D45_PRED
MB_MODE_COUNT, // D135_PRED
- MB_MODE_COUNT, // D117_PRED
- MB_MODE_COUNT, // D153_PRED
- MB_MODE_COUNT, // D207_PRED
- MB_MODE_COUNT, // D63_PRED
+ MB_MODE_COUNT, // D113_PRED
+ MB_MODE_COUNT, // D157_PRED
+ MB_MODE_COUNT, // D203_PRED
+ MB_MODE_COUNT, // D67_PRED
MB_MODE_COUNT, // SMOOTH_PRED
-#if CONFIG_SMOOTH_HV
MB_MODE_COUNT, // SMOOTH_V_PRED
MB_MODE_COUNT, // SMOOTH_H_PRED
-#endif // CONFIG_SMOOTH_HV
- MB_MODE_COUNT, // TM_PRED
+ MB_MODE_COUNT, // PAETH_PRED
MB_MODE_COUNT, // NEARESTMV
MB_MODE_COUNT, // NEARMV
- MB_MODE_COUNT, // ZEROMV
+ MB_MODE_COUNT, // GLOBALMV
MB_MODE_COUNT, // NEWMV
-#if CONFIG_COMPOUND_SINGLEREF
- NEARESTMV, // SR_NEAREST_NEARMV
- // NEARESTMV, // SR_NEAREST_NEWMV
- NEARMV, // SR_NEAR_NEWMV
- ZEROMV, // SR_ZERO_NEWMV
- NEWMV, // SR_NEW_NEWMV
-#endif // CONFIG_COMPOUND_SINGLEREF
- NEARESTMV, // NEAREST_NEARESTMV
- NEARMV, // NEAR_NEARMV
- NEARESTMV, // NEAREST_NEWMV
- NEWMV, // NEW_NEARESTMV
- NEARMV, // NEAR_NEWMV
- NEWMV, // NEW_NEARMV
- ZEROMV, // ZERO_ZEROMV
- NEWMV, // NEW_NEWMV
+ NEARESTMV, // NEAREST_NEARESTMV
+ NEARMV, // NEAR_NEARMV
+ NEARESTMV, // NEAREST_NEWMV
+ NEWMV, // NEW_NEARESTMV
+ NEARMV, // NEAR_NEWMV
+ NEWMV, // NEW_NEARMV
+ GLOBALMV, // GLOBAL_GLOBALMV
+ NEWMV, // NEW_NEWMV
};
assert(NELEMENTS(lut) == MB_MODE_COUNT);
-#if CONFIG_COMPOUND_SINGLEREF
- assert(is_inter_anyref_comp_mode(mode));
-#else // !CONFIG_COMPOUND_SINGLEREF
assert(is_inter_compound_mode(mode));
-#endif // CONFIG_COMPOUND_SINGLEREF
return lut[mode];
}
@@ -197,94 +112,54 @@ static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) {
MB_MODE_COUNT, // H_PRED
MB_MODE_COUNT, // D45_PRED
MB_MODE_COUNT, // D135_PRED
- MB_MODE_COUNT, // D117_PRED
- MB_MODE_COUNT, // D153_PRED
- MB_MODE_COUNT, // D207_PRED
- MB_MODE_COUNT, // D63_PRED
+ MB_MODE_COUNT, // D113_PRED
+ MB_MODE_COUNT, // D157_PRED
+ MB_MODE_COUNT, // D203_PRED
+ MB_MODE_COUNT, // D67_PRED
MB_MODE_COUNT, // SMOOTH_PRED
-#if CONFIG_SMOOTH_HV
MB_MODE_COUNT, // SMOOTH_V_PRED
MB_MODE_COUNT, // SMOOTH_H_PRED
-#endif // CONFIG_SMOOTH_HV
- MB_MODE_COUNT, // TM_PRED
+ MB_MODE_COUNT, // PAETH_PRED
MB_MODE_COUNT, // NEARESTMV
MB_MODE_COUNT, // NEARMV
- MB_MODE_COUNT, // ZEROMV
+ MB_MODE_COUNT, // GLOBALMV
MB_MODE_COUNT, // NEWMV
-#if CONFIG_COMPOUND_SINGLEREF
- NEARMV, // SR_NEAREST_NEARMV
- // NEWMV, // SR_NEAREST_NEWMV
- NEWMV, // SR_NEAR_NEWMV
- NEWMV, // SR_ZERO_NEWMV
- NEWMV, // SR_NEW_NEWMV
-#endif // CONFIG_COMPOUND_SINGLEREF
- NEARESTMV, // NEAREST_NEARESTMV
- NEARMV, // NEAR_NEARMV
- NEWMV, // NEAREST_NEWMV
- NEARESTMV, // NEW_NEARESTMV
- NEWMV, // NEAR_NEWMV
- NEARMV, // NEW_NEARMV
- ZEROMV, // ZERO_ZEROMV
- NEWMV, // NEW_NEWMV
+ NEARESTMV, // NEAREST_NEARESTMV
+ NEARMV, // NEAR_NEARMV
+ NEWMV, // NEAREST_NEWMV
+ NEARESTMV, // NEW_NEARESTMV
+ NEWMV, // NEAR_NEWMV
+ NEARMV, // NEW_NEARMV
+ GLOBALMV, // GLOBAL_GLOBALMV
+ NEWMV, // NEW_NEWMV
};
assert(NELEMENTS(lut) == MB_MODE_COUNT);
-#if CONFIG_COMPOUND_SINGLEREF
- assert(is_inter_anyref_comp_mode(mode));
-#else // !CONFIG_COMPOUND_SINGLEREF
assert(is_inter_compound_mode(mode));
-#endif // CONFIG_COMPOUND_SINGLEREF
return lut[mode];
}
static INLINE int have_nearmv_in_inter_mode(PREDICTION_MODE mode) {
return (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV ||
-#if CONFIG_COMPOUND_SINGLEREF
- mode == SR_NEAREST_NEARMV || mode == SR_NEAR_NEWMV ||
-#endif // CONFIG_COMPOUND_SINGLEREF
mode == NEW_NEARMV);
}
static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV ||
-#if CONFIG_COMPOUND_SINGLEREF
- /* mode == SR_NEAREST_NEWMV || */ mode == SR_NEAR_NEWMV ||
- mode == SR_ZERO_NEWMV || mode == SR_NEW_NEWMV ||
-#endif // CONFIG_COMPOUND_SINGLEREF
mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV);
}
static INLINE int use_masked_motion_search(COMPOUND_TYPE type) {
-#if CONFIG_WEDGE
return (type == COMPOUND_WEDGE);
-#else
- (void)type;
- return 0;
-#endif
}
static INLINE int is_masked_compound_type(COMPOUND_TYPE type) {
-#if CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
- return (type == COMPOUND_WEDGE || type == COMPOUND_SEG);
-#elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
- return (type == COMPOUND_WEDGE);
-#elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
- return (type == COMPOUND_SEG);
-#endif // CONFIG_COMPOUND_SEGMENT
- (void)type;
- return 0;
+ return (type == COMPOUND_WEDGE || type == COMPOUND_DIFFWTD);
}
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
-typedef struct {
- PREDICTION_MODE as_mode;
- int_mv as_mv[2]; // first, second inter predictor motion vectors
- int_mv pred_mv[2];
- int_mv ref_mv[2];
-} b_mode_info;
-
typedef int8_t MV_REFERENCE_FRAME;
typedef struct {
@@ -294,19 +169,17 @@ typedef struct {
uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
} PALETTE_MODE_INFO;
-#if CONFIG_FILTER_INTRA
-#define USE_3TAP_INTRA_FILTER 1 // 0: 4-tap; 1: 3-tap
typedef struct {
- // 1: an ext intra mode is used; 0: otherwise.
- uint8_t use_filter_intra_mode[PLANE_TYPES];
- FILTER_INTRA_MODE filter_intra_mode[PLANE_TYPES];
+ uint8_t use_filter_intra;
+ FILTER_INTRA_MODE filter_intra_mode;
} FILTER_INTRA_MODE_INFO;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_VAR_TX
+static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
+ DC_PRED, V_PRED, H_PRED, D157_PRED, DC_PRED
+};
+
#if CONFIG_RD_DEBUG
-#define TXB_COEFF_COST_MAP_SIZE (2 * MAX_MIB_SIZE)
-#endif
+#define TXB_COEFF_COST_MAP_SIZE (MAX_MIB_SIZE)
#endif
typedef struct RD_STATS {
@@ -325,213 +198,122 @@ typedef struct RD_STATS {
uint8_t invalid_rate;
#if CONFIG_RD_DEBUG
int txb_coeff_cost[MAX_MB_PLANE];
-#if CONFIG_VAR_TX
int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
[TXB_COEFF_COST_MAP_SIZE];
-#endif // CONFIG_VAR_TX
#endif // CONFIG_RD_DEBUG
} RD_STATS;
// This struct is used to group function args that are commonly
// sent together in functions related to interinter compound modes
typedef struct {
-#if CONFIG_WEDGE
int wedge_index;
int wedge_sign;
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- SEG_MASK_TYPE mask_type;
+ DIFFWTD_MASK_TYPE mask_type;
uint8_t *seg_mask;
-#endif // CONFIG_COMPOUND_SEGMENT
- COMPOUND_TYPE interinter_compound_type;
+ COMPOUND_TYPE type;
} INTERINTER_COMPOUND_DATA;
-// This structure now relates to 8x8 block regions.
+#define INTER_TX_SIZE_BUF_LEN 16
+#define TXK_TYPE_BUF_LEN 64
+// This structure now relates to 4x4 block regions.
typedef struct MB_MODE_INFO {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
TX_SIZE tx_size;
-#if CONFIG_VAR_TX
- // TODO(jingning): This effectively assigned a separate entry for each
- // 8x8 block. Apparently it takes much more space than needed.
- TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
- TX_SIZE min_tx_size;
-#endif
+ uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN];
int8_t skip;
+ int8_t skip_mode;
int8_t segment_id;
-#if CONFIG_SUPERTX
- // Minimum of all segment IDs under the current supertx block.
- int8_t segment_id_supertx;
-#endif // CONFIG_SUPERTX
int8_t seg_id_predicted; // valid only when temporal_update is enabled
-#if CONFIG_MRC_TX
- int valid_mrc_mask;
-#endif // CONFIG_MRC_TX
-
// Only for INTRA blocks
UV_PREDICTION_MODE uv_mode;
PALETTE_MODE_INFO palette_mode_info;
-#if CONFIG_INTRABC
uint8_t use_intrabc;
-#endif // CONFIG_INTRABC
// Only for INTER blocks
InterpFilters interp_filters;
MV_REFERENCE_FRAME ref_frame[2];
- TX_TYPE tx_type;
-#if CONFIG_TXK_SEL
- TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-#endif
-#if CONFIG_LGT_FROM_PRED
- int use_lgt;
-#endif
-#if CONFIG_FILTER_INTRA
+ TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
+
FILTER_INTRA_MODE_INFO filter_intra_mode_info;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
+
// The actual prediction angle is the base angle + (angle_delta * step).
- int8_t angle_delta[2];
-#if CONFIG_INTRA_INTERP
- // To-Do (huisu): this may be replaced by interp_filter
- INTRA_FILTER intra_filter;
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-
-#if CONFIG_INTERINTRA
+ int8_t angle_delta[PLANE_TYPES];
+
// interintra members
INTERINTRA_MODE interintra_mode;
-#endif
// TODO(debargha): Consolidate these flags
int use_wedge_interintra;
int interintra_wedge_index;
int interintra_wedge_sign;
// interinter members
- COMPOUND_TYPE interinter_compound_type;
-#if CONFIG_WEDGE
- int wedge_index;
- int wedge_sign;
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- SEG_MASK_TYPE mask_type;
-#endif // CONFIG_COMPOUND_SEGMENT
+ INTERINTER_COMPOUND_DATA interinter_comp;
MOTION_MODE motion_mode;
-#if CONFIG_MOTION_VAR
int overlappable_neighbors[2];
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- // Applying different weighting kernels in ncobmc
- // In current implementation, interpolation modes only defined for squared
- // blocks. A rectangular block is divided into two squared blocks and each
- // squared block has an interpolation mode.
- NCOBMC_MODE ncobmc_mode[2];
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#endif // CONFIG_MOTION_VAR
int_mv mv[2];
- int_mv pred_mv[2];
uint8_t ref_mv_idx;
-#if CONFIG_EXT_PARTITION_TYPES
PARTITION_TYPE partition;
-#endif
-#if CONFIG_NEW_QUANT
- int dq_off_index;
- int send_dq_bit;
-#endif // CONFIG_NEW_QUANT
/* deringing gain *per-superblock* */
int8_t cdef_strength;
- int current_q_index;
-#if CONFIG_EXT_DELTA_Q
- int current_delta_lf_from_base;
-#if CONFIG_LOOPFILTER_LEVEL
- int curr_delta_lf[FRAME_LF_COUNT];
-#endif // CONFIG_LOOPFILTER_LEVEL
-#endif
+ int current_qindex;
+ int delta_lf_from_base;
+ int delta_lf[FRAME_LF_COUNT];
#if CONFIG_RD_DEBUG
RD_STATS rd_stats;
int mi_row;
int mi_col;
#endif
-#if CONFIG_WARPED_MOTION
int num_proj_ref[2];
WarpedMotionParams wm_params[2];
-#endif // CONFIG_WARPED_MOTION
-#if CONFIG_CFL
// Index of the alpha Cb and alpha Cr combination
int cfl_alpha_idx;
// Joint sign of alpha Cb and alpha Cr
int cfl_alpha_signs;
-#endif
- BOUNDARY_TYPE boundary_info;
-#if CONFIG_LPF_SB
- uint8_t filt_lvl;
- int reuse_sb_lvl;
- int sign;
- int delta;
-#endif
+ int compound_idx;
+ int comp_group_idx;
} MB_MODE_INFO;
-typedef struct MODE_INFO {
- MB_MODE_INFO mbmi;
- b_mode_info bmi[4];
-} MODE_INFO;
-
-#if CONFIG_INTRABC
static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi) {
return mbmi->use_intrabc;
}
-#endif
-
-static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
-#if CONFIG_CB4X4
- (void)block;
- return mi->mbmi.mode;
-#else
- return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mbmi.mode;
-#endif
-}
-#if CONFIG_CFL
static INLINE PREDICTION_MODE get_uv_mode(UV_PREDICTION_MODE mode) {
- static const PREDICTION_MODE uv2y[UV_INTRA_MODES] = {
- DC_PRED, // UV_DC_PRED
- V_PRED, // UV_V_PRED
- H_PRED, // UV_H_PRED
- D45_PRED, // UV_D45_PRED
- D135_PRED, // UV_D135_PRED
- D117_PRED, // UV_D117_PRED
- D153_PRED, // UV_D153_PRED
- D207_PRED, // UV_D207_PRED
- D63_PRED, // UV_D63_PRED
- SMOOTH_PRED, // UV_SMOOTH_PRED
-#if CONFIG_SMOOTH_HV
+ assert(mode < UV_INTRA_MODES);
+ static const PREDICTION_MODE uv2y[] = {
+ DC_PRED, // UV_DC_PRED
+ V_PRED, // UV_V_PRED
+ H_PRED, // UV_H_PRED
+ D45_PRED, // UV_D45_PRED
+ D135_PRED, // UV_D135_PRED
+ D113_PRED, // UV_D113_PRED
+ D157_PRED, // UV_D157_PRED
+ D203_PRED, // UV_D203_PRED
+ D67_PRED, // UV_D67_PRED
+ SMOOTH_PRED, // UV_SMOOTH_PRED
SMOOTH_V_PRED, // UV_SMOOTH_V_PRED
SMOOTH_H_PRED, // UV_SMOOTH_H_PRED
-#endif // CONFIG_SMOOTH_HV
- TM_PRED, // UV_TM_PRED
- DC_PRED, // CFL_PRED
+ PAETH_PRED, // UV_PAETH_PRED
+ DC_PRED, // UV_CFL_PRED
+ INTRA_INVALID, // UV_INTRA_MODES
+ INTRA_INVALID, // UV_MODE_INVALID
};
return uv2y[mode];
}
-#else
-static INLINE PREDICTION_MODE get_uv_mode(PREDICTION_MODE mode) { return mode; }
-#endif // CONFIG_CFL
static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
-#if CONFIG_INTRABC
- if (is_intrabc_block(mbmi)) return 1;
-#endif
- return mbmi->ref_frame[0] > INTRA_FRAME;
+ return is_intrabc_block(mbmi) || mbmi->ref_frame[0] > INTRA_FRAME;
}
static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
return mbmi->ref_frame[1] > INTRA_FRAME;
}
-#if CONFIG_EXT_COMP_REFS
static INLINE int has_uni_comp_refs(const MB_MODE_INFO *mbmi) {
return has_second_ref(mbmi) && (!((mbmi->ref_frame[0] >= BWDREF_FRAME) ^
(mbmi->ref_frame[1] >= BWDREF_FRAME)));
@@ -539,48 +321,60 @@ static INLINE int has_uni_comp_refs(const MB_MODE_INFO *mbmi) {
static INLINE MV_REFERENCE_FRAME comp_ref0(int ref_idx) {
static const MV_REFERENCE_FRAME lut[] = {
- LAST_FRAME, // LAST_LAST2_FRAMES,
- LAST_FRAME, // LAST_LAST3_FRAMES,
- LAST_FRAME, // LAST_GOLDEN_FRAMES,
- BWDREF_FRAME, // BWDREF_ALTREF_FRAMES,
+ LAST_FRAME, // LAST_LAST2_FRAMES,
+ LAST_FRAME, // LAST_LAST3_FRAMES,
+ LAST_FRAME, // LAST_GOLDEN_FRAMES,
+ BWDREF_FRAME, // BWDREF_ALTREF_FRAMES,
+ LAST2_FRAME, // LAST2_LAST3_FRAMES
+ LAST2_FRAME, // LAST2_GOLDEN_FRAMES,
+ LAST3_FRAME, // LAST3_GOLDEN_FRAMES,
+ BWDREF_FRAME, // BWDREF_ALTREF2_FRAMES,
+ ALTREF2_FRAME, // ALTREF2_ALTREF_FRAMES,
};
- assert(NELEMENTS(lut) == UNIDIR_COMP_REFS);
+ assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
return lut[ref_idx];
}
static INLINE MV_REFERENCE_FRAME comp_ref1(int ref_idx) {
static const MV_REFERENCE_FRAME lut[] = {
- LAST2_FRAME, // LAST_LAST2_FRAMES,
- LAST3_FRAME, // LAST_LAST3_FRAMES,
- GOLDEN_FRAME, // LAST_GOLDEN_FRAMES,
- ALTREF_FRAME, // BWDREF_ALTREF_FRAMES,
+ LAST2_FRAME, // LAST_LAST2_FRAMES,
+ LAST3_FRAME, // LAST_LAST3_FRAMES,
+ GOLDEN_FRAME, // LAST_GOLDEN_FRAMES,
+ ALTREF_FRAME, // BWDREF_ALTREF_FRAMES,
+ LAST3_FRAME, // LAST2_LAST3_FRAMES
+ GOLDEN_FRAME, // LAST2_GOLDEN_FRAMES,
+ GOLDEN_FRAME, // LAST3_GOLDEN_FRAMES,
+ ALTREF2_FRAME, // BWDREF_ALTREF2_FRAMES,
+ ALTREF_FRAME, // ALTREF2_ALTREF_FRAMES,
};
- assert(NELEMENTS(lut) == UNIDIR_COMP_REFS);
+ assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
return lut[ref_idx];
}
-#endif // CONFIG_EXT_COMP_REFS
-PREDICTION_MODE av1_left_block_mode(const MODE_INFO *cur_mi,
- const MODE_INFO *left_mi, int b);
+PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi);
-PREDICTION_MODE av1_above_block_mode(const MODE_INFO *cur_mi,
- const MODE_INFO *above_mi, int b);
+PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi);
-#if CONFIG_GLOBAL_MOTION
-static INLINE int is_global_mv_block(const MODE_INFO *mi, int block,
+static INLINE int is_global_mv_block(const MB_MODE_INFO *const mbmi,
TransformationType type) {
- PREDICTION_MODE mode = get_y_mode(mi, block);
-#if GLOBAL_SUB8X8_USED
- const int block_size_allowed = 1;
-#else
- const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+ const PREDICTION_MODE mode = mbmi->mode;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
const int block_size_allowed =
AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
-#endif // GLOBAL_SUB8X8_USED
- return (mode == ZEROMV || mode == ZERO_ZEROMV) && type > TRANSLATION &&
+ return (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) && type > TRANSLATION &&
block_size_allowed;
}
-#endif // CONFIG_GLOBAL_MOTION
+
+#if CONFIG_MISMATCH_DEBUG
+static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
+ int mi_row, int tx_blk_col, int tx_blk_row,
+ int subsampling_x, int subsampling_y) {
+ *pixel_c = ((mi_col >> subsampling_x) << MI_SIZE_LOG2) +
+ (tx_blk_col << tx_size_wide_log2[0]);
+ *pixel_r = ((mi_row >> subsampling_y) << MI_SIZE_LOG2) +
+ (tx_blk_row << tx_size_high_log2[0]);
+}
+#endif
enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
@@ -592,8 +386,22 @@ struct buf_2d {
int stride;
};
+typedef struct eob_info {
+ uint16_t eob;
+ uint16_t max_scan_line;
+} eob_info;
+
+typedef struct {
+ DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]);
+ eob_info eob_data[MAX_MB_PLANE]
+ [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+ DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
+} CB_BUFFER;
+
typedef struct macroblockd_plane {
tran_low_t *dqcoeff;
+ tran_low_t *dqcoeff_block;
+ eob_info *eob_data;
PLANE_TYPE plane_type;
int subsampling_x;
int subsampling_y;
@@ -601,56 +409,36 @@ typedef struct macroblockd_plane {
struct buf_2d pre[2];
ENTROPY_CONTEXT *above_context;
ENTROPY_CONTEXT *left_context;
- int16_t seg_dequant[MAX_SEGMENTS][2];
-#if CONFIG_NEW_QUANT
- dequant_val_type_nuq seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES]
- [COEF_BANDS];
-#endif
+
+ // The dequantizers below are true dequntizers used only in the
+ // dequantization process. They have the same coefficient
+ // shift/scale as TX.
+ int16_t seg_dequant_QTX[MAX_SEGMENTS][2];
uint8_t *color_index_map;
- // number of 4x4s in current block
- uint16_t n4_w, n4_h;
- // log2 of n4_w, n4_h
- uint8_t n4_wl, n4_hl;
// block size in pixels
uint8_t width, height;
-#if CONFIG_AOM_QM
- qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
- qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
-#endif
- // encoder
- const int16_t *dequant;
-#if CONFIG_NEW_QUANT
- const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
-#endif // CONFIG_NEW_QUANT
-
-#if CONFIG_PVQ || CONFIG_DIST_8X8
- DECLARE_ALIGNED(16, int16_t, pred[MAX_SB_SQUARE]);
-#endif
-#if CONFIG_PVQ
- // PVQ: forward transformed predicted image, a reference for PVQ.
- tran_low_t *pvq_ref_coeff;
-#endif
+ qm_val_t *seg_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+ qm_val_t *seg_qmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+
+ // the 'dequantizers' below are not literal dequantizer values.
+ // They're used by encoder RDO to generate ad-hoc lambda values.
+ // They use a hardwired Q3 coeff shift and do not necessarily match
+ // the TX scale in use.
+ const int16_t *dequant_Q3;
} MACROBLOCKD_PLANE;
#define BLOCK_OFFSET(x, i) \
((x) + (i) * (1 << (tx_size_wide_log2[0] + tx_size_high_log2[0])))
typedef struct RefBuffer {
- int idx;
+ int idx; // frame buf idx
+ int map_idx; // frame map idx
YV12_BUFFER_CONFIG *buf;
struct scale_factors sf;
-#if CONFIG_VAR_REFS
- int is_valid;
-#endif // CONFIG_VAR_REFS
} RefBuffer;
-#if CONFIG_ADAPT_SCAN
-typedef int16_t EobThresholdMD[TX_TYPES][EOB_THRESHOLD_NUM];
-#endif
-
-#if CONFIG_LOOP_RESTORATION
typedef struct {
DECLARE_ALIGNED(16, InterpKernel, vfilter);
DECLARE_ALIGNED(16, InterpKernel, hfilter);
@@ -660,77 +448,75 @@ typedef struct {
int ep;
int xqd[2];
} SgrprojInfo;
-#endif // CONFIG_LOOP_RESTORATION
-#if CONFIG_CFL
-#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#if CONFIG_DEBUG
#define CFL_SUB8X8_VAL_MI_SIZE (4)
#define CFL_SUB8X8_VAL_MI_SQUARE \
(CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
-#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#endif // CONFIG_DEBUG
+#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32)
+#define CFL_BUF_LINE (32)
+#define CFL_BUF_LINE_I128 (CFL_BUF_LINE >> 3)
+#define CFL_BUF_LINE_I256 (CFL_BUF_LINE >> 4)
+#define CFL_BUF_SQUARE (CFL_BUF_LINE * CFL_BUF_LINE)
typedef struct cfl_ctx {
- // The CfL prediction buffer is used in two steps:
- // 1. Stores Q3 reconstructed luma pixels
- // (only Q2 is required, but Q3 is used to avoid shifts)
- // 2. Stores Q3 AC contributions (step1 - tx block avg)
- int16_t pred_buf_q3[MAX_SB_SQUARE];
+ // Q3 reconstructed luma pixels (only Q2 is required, but Q3 is used to avoid
+ // shifts)
+ uint16_t recon_buf_q3[CFL_BUF_SQUARE];
+ // Q3 AC contributions (reconstructed luma pixels - tx block avg)
+ int16_t ac_buf_q3[CFL_BUF_SQUARE];
+
+ // Cache the DC_PRED when performing RDO, so it does not have to be recomputed
+ // for every scaling parameter
+ int dc_pred_is_cached[CFL_PRED_PLANES];
+ // The DC_PRED cache is disable when decoding
+ int use_dc_pred_cache;
+ // Only cache the first row of the DC_PRED
+ int16_t dc_pred_cache[CFL_PRED_PLANES][CFL_BUF_LINE];
// Height and width currently used in the CfL prediction buffer.
int buf_height, buf_width;
- // Height and width of the chroma prediction block currently associated with
- // this context
- int uv_height, uv_width;
-
int are_parameters_computed;
// Chroma subsampling
int subsampling_x, subsampling_y;
- // Block level DC_PRED for each chromatic plane
- int dc_pred[CFL_PRED_PLANES];
-
int mi_row, mi_col;
// Whether the reconstructed luma pixels need to be stored
int store_y;
-#if CONFIG_CB4X4
+#if CONFIG_DEBUG
+ int rate;
+#endif // CONFIG_DEBUG
+
int is_chroma_reference;
-#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
- // The prediction used for sub8x8 blocks originates from multiple luma blocks,
- // this array is used to validate that cfl_store() is called only once for
- // each luma block
- uint8_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
-#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
-#endif // CONFIG_CB4X4
} CFL_CTX;
-#endif // CONFIG_CFL
+
+typedef struct jnt_comp_params {
+ int use_jnt_comp_avg;
+ int fwd_offset;
+ int bck_offset;
+} JNT_COMP_PARAMS;
typedef struct macroblockd {
struct macroblockd_plane plane[MAX_MB_PLANE];
- uint8_t bmode_blocks_wl;
- uint8_t bmode_blocks_hl;
- FRAME_COUNTS *counts;
TileInfo tile;
int mi_stride;
- MODE_INFO **mi;
- MODE_INFO *left_mi;
- MODE_INFO *above_mi;
+ MB_MODE_INFO **mi;
MB_MODE_INFO *left_mbmi;
MB_MODE_INFO *above_mbmi;
+ MB_MODE_INFO *chroma_left_mbmi;
+ MB_MODE_INFO *chroma_above_mbmi;
int up_available;
int left_available;
-#if CONFIG_CHROMA_SUB8X8
int chroma_up_available;
int chroma_left_available;
-#endif
-
- const aom_prob (*partition_probs)[PARTITION_TYPES - 1];
/* Distance of MB away from frame edges in subpixels (1/8th pixel) */
int mb_to_left_edge;
@@ -738,40 +524,24 @@ typedef struct macroblockd {
int mb_to_top_edge;
int mb_to_bottom_edge;
- FRAME_CONTEXT *fc;
-
/* pointers to reference frames */
const RefBuffer *block_refs[2];
/* pointer to current frame */
const YV12_BUFFER_CONFIG *cur_buf;
-#if CONFIG_INTRABC
- /* Scale of the current frame with respect to itself */
- struct scale_factors sf_identity;
-#endif
-
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
- ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT left_context[MAX_MB_PLANE][MAX_MIB_SIZE];
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE];
-#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT *left_txfm_context;
- TXFM_CONTEXT left_txfm_context_buffer[2 * MAX_MIB_SIZE];
-
- TX_SIZE max_tx_size;
-#if CONFIG_SUPERTX
- TX_SIZE supertx_size;
-#endif
-#endif
+ TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE];
-#if CONFIG_LOOP_RESTORATION
WienerInfo wiener_info[MAX_MB_PLANE];
SgrprojInfo sgrproj_info[MAX_MB_PLANE];
-#endif // CONFIG_LOOP_RESTORATION
// block dimension in the unit of mode_info.
uint8_t n8_w, n8_h;
@@ -780,9 +550,10 @@ typedef struct macroblockd {
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
uint8_t is_sec_rect;
-#if CONFIG_PVQ
- daala_dec_ctx daala_dec;
-#endif
+ // Counts of each reference frame in the above and left neighboring blocks.
+ // NOTE: Take into account both single and comp references.
+ uint8_t neighbors_ref_counts[REF_FRAMES];
+
FRAME_CONTEXT *tile_ctx;
/* Bit depth: 8, 10, 12 */
int bd;
@@ -790,27 +561,19 @@ typedef struct macroblockd {
int qindex[MAX_SEGMENTS];
int lossless[MAX_SEGMENTS];
int corrupted;
-#if CONFIG_AMVR
- int cur_frame_mv_precision_level;
-// same with that in AV1_COMMON
-#endif
+ int cur_frame_force_integer_mv;
+ // same with that in AV1_COMMON
struct aom_internal_error_info *error_info;
-#if CONFIG_GLOBAL_MOTION
- WarpedMotionParams *global_motion;
-#endif // CONFIG_GLOBAL_MOTION
- int prev_qindex;
+ const WarpedMotionParams *global_motion;
int delta_qindex;
int current_qindex;
-#if CONFIG_EXT_DELTA_Q
// Since actual frame level loop filtering level value is not available
// at the beginning of the tile (only available during actual filtering)
// at encoder side.we record the delta_lf (against the frame level loop
// filtering level) and code the delta between previous superblock's delta
// lf and current delta lf. It is equivalent to the delta between previous
// superblock's actual lf and current lf.
- int prev_delta_lf_from_base;
- int current_delta_lf_from_base;
-#if CONFIG_LOOPFILTER_LEVEL
+ int delta_lf_from_base;
// For this experiment, we have four frame filter levels for different plane
// and direction. So, to support the per superblock update, we need to add
// a few more params as below.
@@ -824,420 +587,151 @@ typedef struct macroblockd {
// SEG_LVL_ALT_LF_Y_H = 2;
// SEG_LVL_ALT_LF_U = 3;
// SEG_LVL_ALT_LF_V = 4;
- int prev_delta_lf[FRAME_LF_COUNT];
- int curr_delta_lf[FRAME_LF_COUNT];
-#endif // CONFIG_LOOPFILTER_LEVEL
-#endif
-#if CONFIG_ADAPT_SCAN
- const EobThresholdMD *eob_threshold_md;
-#endif
+ int delta_lf[FRAME_LF_COUNT];
+ int cdef_preset[4];
-#if CONFIG_COMPOUND_SEGMENT
DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
-#endif // CONFIG_COMPOUND_SEGMENT
+ uint8_t *mc_buf[2];
+ CFL_CTX cfl;
-#if CONFIG_MRC_TX
- uint8_t *mrc_mask;
-#endif // CONFIG_MRC_TX
+ JNT_COMP_PARAMS jcp_param;
-#if CONFIG_CFL
- CFL_CTX *cfl;
-#endif
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- uint8_t *ncobmc_pred_buf[MAX_MB_PLANE];
- int ncobmc_pred_buf_stride[MAX_MB_PLANE];
- SB_MI_BD sb_mi_bd;
-#endif
+ uint16_t cb_offset[MAX_MB_PLANE];
+ uint16_t txb_offset[MAX_MB_PLANE];
+ uint16_t color_index_map_offset[2];
} MACROBLOCKD;
static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
}
-static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
- PARTITION_TYPE partition) {
- if (partition == PARTITION_INVALID)
- return BLOCK_INVALID;
- else
- return subsize_lookup[partition][bsize];
-}
-
-static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
- DCT_DCT, // DC
- ADST_DCT, // V
- DCT_ADST, // H
- DCT_DCT, // D45
- ADST_ADST, // D135
- ADST_DCT, // D117
- DCT_ADST, // D153
- DCT_ADST, // D207
- ADST_DCT, // D63
- ADST_ADST, // SMOOTH
-#if CONFIG_SMOOTH_HV
- ADST_DCT, // SMOOTH_V
- DCT_ADST, // SMOOTH_H
-#endif // CONFIG_SMOOTH_HV
- ADST_ADST, // TM
-};
+static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) {
+ switch (bsize) {
+ case BLOCK_4X4: return 0;
+ case BLOCK_8X8: return 1;
+ case BLOCK_16X16: return 2;
+ case BLOCK_32X32: return 3;
+ case BLOCK_64X64: return 4;
+ case BLOCK_128X128: return 5;
+ default: return SQR_BLOCK_SIZES;
+ }
+}
-#if CONFIG_SUPERTX
-static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
- TX_SIZE max_tx_size = txsize_sqr_map[mbmi->tx_size];
- return tx_size_wide[max_tx_size] >
- AOMMIN(block_size_wide[mbmi->sb_type], block_size_high[mbmi->sb_type]);
+// Note: the input block size should be square.
+// Otherwise it's considered invalid.
+static INLINE BLOCK_SIZE get_partition_subsize(BLOCK_SIZE bsize,
+ PARTITION_TYPE partition) {
+ if (partition == PARTITION_INVALID) {
+ return BLOCK_INVALID;
+ } else {
+ const int sqr_bsize_idx = get_sqr_bsize_idx(bsize);
+ return sqr_bsize_idx >= SQR_BLOCK_SIZES
+ ? BLOCK_INVALID
+ : subsize_lookup[partition][sqr_bsize_idx];
+ }
}
-#endif // CONFIG_SUPERTX
-#define USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4 1
+static TX_TYPE intra_mode_to_tx_type(const MB_MODE_INFO *mbmi,
+ PLANE_TYPE plane_type) {
+ static const TX_TYPE _intra_mode_to_tx_type[INTRA_MODES] = {
+ DCT_DCT, // DC
+ ADST_DCT, // V
+ DCT_ADST, // H
+ DCT_DCT, // D45
+ ADST_ADST, // D135
+ ADST_DCT, // D117
+ DCT_ADST, // D153
+ DCT_ADST, // D207
+ ADST_DCT, // D63
+ ADST_ADST, // SMOOTH
+ ADST_DCT, // SMOOTH_V
+ DCT_ADST, // SMOOTH_H
+ ADST_ADST, // PAETH
+ };
+ const PREDICTION_MODE mode =
+ (plane_type == PLANE_TYPE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
+ assert(mode < INTRA_MODES);
+ return _intra_mode_to_tx_type[mode];
+}
-#if CONFIG_RECT_TX
static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
-#endif // CONFIG_RECT_TX
static INLINE int block_signals_txsize(BLOCK_SIZE bsize) {
-#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
return bsize > BLOCK_4X4;
-#else
- return bsize >= BLOCK_8X8;
-#endif
}
-#if CONFIG_MRC_TX
-#define USE_MRC_INTRA 0
-#define USE_MRC_INTER 1
-#define SIGNAL_MRC_MASK_INTRA (USE_MRC_INTRA && 0)
-#define SIGNAL_MRC_MASK_INTER (USE_MRC_INTER && 1)
-#define SIGNAL_ANY_MRC_MASK (SIGNAL_MRC_MASK_INTRA || SIGNAL_MRC_MASK_INTER)
-#endif // CONFIG_MRC_TX
-
-#if CONFIG_EXT_TX
-#define ALLOW_INTRA_EXT_TX 1
-
// Number of transform types in each set type
static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = {
- 1, 2,
-#if CONFIG_MRC_TX
- 2, 3,
-#endif // CONFIG_MRC_TX
- 5, 7, 12, 16,
-};
-
-static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
- EXT_TX_SETS_INTER)] = {
- {
- // Intra
- EXT_TX_SET_DCTONLY, EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX,
-#if CONFIG_MRC_TX
- EXT_TX_SET_MRC_DCT,
-#endif // CONFIG_MRC_TX
- },
- {
- // Inter
- EXT_TX_SET_DCTONLY, EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT,
- EXT_TX_SET_DCT_IDTX,
-#if CONFIG_MRC_TX
- EXT_TX_SET_MRC_DCT_IDTX,
-#endif // CONFIG_MRC_TX
- }
+ 1, 2, 5, 7, 12, 16,
};
-#if CONFIG_MRC_TX
static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
- },
- {
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
- },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
};
-#else // CONFIG_MRC_TX
-static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
- },
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- },
-};
-#endif // CONFIG_MRC_TX
-static INLINE TxSetType get_ext_tx_set_type(TX_SIZE tx_size, BLOCK_SIZE bs,
- int is_inter, int use_reduced_set) {
+static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
+ int use_reduced_set) {
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
- const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
-#if CONFIG_CB4X4 && USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
- (void)bs;
if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
-#else
- if (tx_size_sqr_up > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
-#endif
- if (use_reduced_set)
- return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
-#if CONFIG_MRC_TX
- if (tx_size == TX_32X32) {
- if (is_inter && USE_MRC_INTER)
- return EXT_TX_SET_MRC_DCT_IDTX;
- else if (!is_inter && USE_MRC_INTRA)
- return EXT_TX_SET_MRC_DCT;
- }
-#endif // CONFIG_MRC_TX
if (tx_size_sqr_up == TX_32X32)
return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY;
- if (is_inter)
+ if (use_reduced_set)
+ return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
+ const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
+ if (is_inter) {
return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT9_IDTX_1DDCT
: EXT_TX_SET_ALL16);
- else
+ } else {
return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT4_IDTX
: EXT_TX_SET_DTT4_IDTX_1DDCT);
+ }
}
// Maps tx set types to the indices.
static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
- {
- // Intra
- 0, -1,
-#if CONFIG_MRC_TX
- 3, -1,
-#endif // CONFIG_MRC_TX
- 2, 1, -1, -1,
- },
- {
- // Inter
- 0, 3,
-#if CONFIG_MRC_TX
- -1, 4,
-#endif // CONFIG_MRC_TX
- -1, -1, 2, 1,
- },
+ { // Intra
+ 0, -1, 2, 1, -1, -1 },
+ { // Inter
+ 0, 3, -1, -1, 2, 1 },
};
-static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter,
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, int is_inter,
int use_reduced_set) {
const TxSetType set_type =
- get_ext_tx_set_type(tx_size, bs, is_inter, use_reduced_set);
+ av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set);
return ext_tx_set_index[is_inter][set_type];
}
-static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter,
+static INLINE int get_ext_tx_types(TX_SIZE tx_size, int is_inter,
int use_reduced_set) {
const int set_type =
- get_ext_tx_set_type(tx_size, bs, is_inter, use_reduced_set);
+ av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set);
return av1_num_ext_tx_set[set_type];
}
-#if CONFIG_LGT_FROM_PRED
-static INLINE int is_lgt_allowed(PREDICTION_MODE mode, TX_SIZE tx_size) {
- if (!LGT_FROM_PRED_INTRA && !is_inter_mode(mode)) return 0;
- if (!LGT_FROM_PRED_INTER && is_inter_mode(mode)) return 0;
-
- switch (mode) {
- case D45_PRED:
- case D63_PRED:
- case D117_PRED:
- case V_PRED:
-#if CONFIG_SMOOTH_HV
- case SMOOTH_V_PRED:
-#endif
- return tx_size_wide[tx_size] <= 8;
- case D135_PRED:
- case D153_PRED:
- case D207_PRED:
- case H_PRED:
-#if CONFIG_SMOOTH_HV
- case SMOOTH_H_PRED:
-#endif
- return tx_size_high[tx_size] <= 8;
- case DC_PRED:
- case SMOOTH_PRED: return 0;
- case TM_PRED:
- default: return tx_size_wide[tx_size] <= 8 || tx_size_high[tx_size] <= 8;
- }
-}
-#endif // CONFIG_LGT_FROM_PRED
-
-#if CONFIG_RECT_TX
-static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
- static const char LUT[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, // BLOCK_2X2
- 0, // BLOCK_2X4
- 0, // BLOCK_4X2
-#endif
- 0, // BLOCK_4X4
- 1, // BLOCK_4X8
- 1, // BLOCK_8X4
- 0, // BLOCK_8X8
- 1, // BLOCK_8X16
- 1, // BLOCK_16X8
- 0, // BLOCK_16X16
- 1, // BLOCK_16X32
- 1, // BLOCK_32X16
- 0, // BLOCK_32X32
- 1, // BLOCK_32X64
- 1, // BLOCK_64X32
- 0, // BLOCK_64X64
-#if CONFIG_EXT_PARTITION
- 0, // BLOCK_64X128
- 0, // BLOCK_128X64
- 0, // BLOCK_128X128
-#endif // CONFIG_EXT_PARTITION
- 0, // BLOCK_4X16
- 0, // BLOCK_16X4
- 0, // BLOCK_8X32
- 0, // BLOCK_32X8
- 0, // BLOCK_16X64
- 0, // BLOCK_64X16
-#if CONFIG_EXT_PARTITION
- 0, // BLOCK_32X128
- 0, // BLOCK_128X32
-#endif // CONFIG_EXT_PARTITION
- };
-
- return LUT[bsize];
-}
+#define TXSIZEMAX(t1, t2) (tx_size_2d[(t1)] >= tx_size_2d[(t2)] ? (t1) : (t2))
+#define TXSIZEMIN(t1, t2) (tx_size_2d[(t1)] <= tx_size_2d[(t2)] ? (t1) : (t2))
-static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi) {
- return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
- !xd->lossless[mbmi->segment_id];
-}
-#endif // CONFIG_RECT_TX
-#endif // CONFIG_EXT_TX
-
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-static INLINE int is_quarter_tx_allowed_bsize(BLOCK_SIZE bsize) {
- static const char LUT_QTTX[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, // BLOCK_2X2
- 0, // BLOCK_2X4
- 0, // BLOCK_4X2
-#endif
- 0, // BLOCK_4X4
- 0, // BLOCK_4X8
- 0, // BLOCK_8X4
- 0, // BLOCK_8X8
- 1, // BLOCK_8X16
- 1, // BLOCK_16X8
- 0, // BLOCK_16X16
- 0, // BLOCK_16X32
- 0, // BLOCK_32X16
- 0, // BLOCK_32X32
- 0, // BLOCK_32X64
- 0, // BLOCK_64X32
- 0, // BLOCK_64X64
-#if CONFIG_EXT_PARTITION
- 0, // BLOCK_64X128
- 0, // BLOCK_128X64
- 0, // BLOCK_128X128
-#endif // CONFIG_EXT_PARTITION
- 0, // BLOCK_4X16
- 0, // BLOCK_16X4
- 0, // BLOCK_8X32
- 0, // BLOCK_32X8
- 0, // BLOCK_16X64
- 0, // BLOCK_64X16
-#if CONFIG_EXT_PARTITION
- 0, // BLOCK_32X128
- 0, // BLOCK_128X32
-#endif // CONFIG_EXT_PARTITION
- };
-
- return LUT_QTTX[bsize];
-}
-
-static INLINE int is_quarter_tx_allowed(const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi,
- int is_inter) {
- return is_quarter_tx_allowed_bsize(mbmi->sb_type) && is_inter &&
- !xd->lossless[mbmi->segment_id];
-}
-#endif
-
-static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
- int is_inter) {
+static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode) {
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
-#if (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
-#else
- const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-#endif // (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
- (void)is_inter;
-#if CONFIG_VAR_TX && CONFIG_RECT_TX
-#if CONFIG_CB4X4
if (bsize == BLOCK_4X4)
return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
-#else
- if (bsize < BLOCK_8X8)
- return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
-#endif
if (txsize_sqr_map[max_rect_tx_size] <= largest_tx_size)
return max_rect_tx_size;
else
return largest_tx_size;
-#elif CONFIG_EXT_TX && CONFIG_RECT_TX
- if (txsize_sqr_up_map[max_rect_tx_size] <= largest_tx_size) {
- return max_rect_tx_size;
- } else {
- return largest_tx_size;
- }
-#else
- return AOMMIN(max_tx_size, largest_tx_size);
-#endif // CONFIG_VAR_TX && CONFIG_RECT_TX
}
-#if CONFIG_EXT_INTRA
-#define MAX_ANGLE_DELTA 3
-#define ANGLE_STEP 3
extern const int16_t dr_intra_derivative[90];
static const uint8_t mode_to_angle_map[] = {
- 0, 90, 180, 45, 135, 111, 157, 203, 67, 0, 0,
-#if CONFIG_SMOOTH_HV
- 0, 0,
-#endif // CONFIG_SMOOTH_HV
+ 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0,
};
-#if CONFIG_INTRA_INTERP
-// Returns whether filter selection is needed for a given
-// intra prediction angle.
-int av1_is_intra_filter_switchable(int angle);
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-
-#if CONFIG_DCT_ONLY
-#define FIXED_TX_TYPE 1
-#else
-#define FIXED_TX_TYPE 0
-#endif
// Converts block_index for given transform size to index of the block in raster
// order.
@@ -1261,168 +755,182 @@ static INLINE int av1_raster_order_to_block_index(TX_SIZE tx_size,
}
static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type,
- const MACROBLOCKD *xd, int block_idx,
+ const MACROBLOCKD *xd,
TX_SIZE tx_size) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
- if (CONFIG_DCT_ONLY || is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y ||
+ if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y ||
xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
return DCT_DCT;
- return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
- ? get_y_mode(xd->mi[0], block_idx)
- : get_uv_mode(mbmi->uv_mode)];
+ return intra_mode_to_tx_type(mbmi, plane_type);
+}
+
+static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
+ int subsampling_x,
+ int subsampling_y) {
+ if (bsize == BLOCK_INVALID) return BLOCK_INVALID;
+ return ss_size_lookup[bsize][subsampling_x][subsampling_y];
+}
+
+static INLINE int av1_get_txb_size_index(BLOCK_SIZE bsize, int blk_row,
+ int blk_col) {
+ TX_SIZE txs = max_txsize_rect_lookup[bsize];
+ for (int level = 0; level < MAX_VARTX_DEPTH - 1; ++level)
+ txs = sub_tx_size_map[txs];
+ const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2;
+ const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2;
+ const int bw_log2 = mi_size_wide_log2[bsize];
+ const int stride_log2 = bw_log2 - tx_w_log2;
+ const int index =
+ ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2);
+ assert(index < INTER_TX_SIZE_BUF_LEN);
+ return index;
+}
+
+static INLINE int av1_get_txk_type_index(BLOCK_SIZE bsize, int blk_row,
+ int blk_col) {
+ TX_SIZE txs = max_txsize_rect_lookup[bsize];
+ for (int level = 0; level < MAX_VARTX_DEPTH; ++level)
+ txs = sub_tx_size_map[txs];
+ const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2;
+ const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2;
+ const int bw_uint_log2 = mi_size_wide_log2[bsize];
+ const int stride_log2 = bw_uint_log2 - tx_w_log2;
+ const int index =
+ ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2);
+ assert(index < TXK_TYPE_BUF_LEN);
+ return index;
+}
+
+static INLINE void update_txk_array(TX_TYPE *txk_type, BLOCK_SIZE bsize,
+ int blk_row, int blk_col, TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ const int txk_type_idx = av1_get_txk_type_index(bsize, blk_row, blk_col);
+ txk_type[txk_type_idx] = tx_type;
+
+ const int txw = tx_size_wide_unit[tx_size];
+ const int txh = tx_size_high_unit[tx_size];
+ // The 16x16 unit is due to the constraint from tx_64x64 which sets the
+ // maximum tx size for chroma as 32x32. Coupled with 4x1 transform block
+ // size, the constraint takes effect in 32x16 / 16x32 size too. To solve
+ // the intricacy, cover all the 16x16 units inside a 64 level transform.
+ if (txw == tx_size_wide_unit[TX_64X64] ||
+ txh == tx_size_high_unit[TX_64X64]) {
+ const int tx_unit = tx_size_wide_unit[TX_16X16];
+ for (int idy = 0; idy < txh; idy += tx_unit) {
+ for (int idx = 0; idx < txw; idx += tx_unit) {
+ const int this_index =
+ av1_get_txk_type_index(bsize, blk_row + idy, blk_col + idx);
+ txk_type[this_index] = tx_type;
+ }
+ }
+ }
}
static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type,
const MACROBLOCKD *xd, int blk_row,
- int blk_col, int block, TX_SIZE tx_size) {
- const MODE_INFO *const mi = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
- (void)blk_row;
- (void)blk_col;
-#if CONFIG_INTRABC && (!CONFIG_EXT_TX || CONFIG_TXK_SEL)
- // TODO(aconverse@google.com): Handle INTRABC + EXT_TX + TXK_SEL
- if (is_intrabc_block(mbmi)) return DCT_DCT;
-#endif // CONFIG_INTRABC && (!CONFIG_EXT_TX || CONFIG_TXK_SEL)
-
-#if CONFIG_TXK_SEL
+ int blk_col, TX_SIZE tx_size,
+ int reduced_tx_set) {
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
+ const struct macroblockd_plane *const pd = &xd->plane[plane_type];
+ const TxSetType tx_set_type =
+ av1_get_ext_tx_set_type(tx_size, is_inter_block(mbmi), reduced_tx_set);
+
TX_TYPE tx_type;
- if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32) {
+ if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32) {
tx_type = DCT_DCT;
} else {
- if (plane_type == PLANE_TYPE_Y)
- tx_type = mbmi->txk_type[(blk_row << 4) + blk_col];
- else if (is_inter_block(mbmi))
- tx_type = mbmi->txk_type[(blk_row << 5) + (blk_col << 1)];
- else
- tx_type = intra_mode_to_tx_type_context[mbmi->uv_mode];
- }
- assert(tx_type >= DCT_DCT && tx_type < TX_TYPES);
- return tx_type;
-#endif // CONFIG_TXK_SEL
-
-#if FIXED_TX_TYPE
- const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
- return get_default_tx_type(plane_type, xd, block_raster_idx, tx_size);
-#endif // FIXED_TX_TYPE
-
-#if CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- if (mbmi->tx_type == MRC_DCT) {
- assert(((is_inter_block(mbmi) && USE_MRC_INTER) ||
- (!is_inter_block(mbmi) && USE_MRC_INTRA)) &&
- "INVALID BLOCK TYPE FOR MRC_DCT");
if (plane_type == PLANE_TYPE_Y) {
- assert(tx_size == TX_32X32);
- return mbmi->tx_type;
+ const int txk_type_idx =
+ av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
+ tx_type = mbmi->txk_type[txk_type_idx];
+ } else if (is_inter_block(mbmi)) {
+ // scale back to y plane's coordinate
+ blk_row <<= pd->subsampling_y;
+ blk_col <<= pd->subsampling_x;
+ const int txk_type_idx =
+ av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
+ tx_type = mbmi->txk_type[txk_type_idx];
+ } else {
+ // In intra mode, uv planes don't share the same prediction mode as y
+ // plane, so the tx_type should not be shared
+ tx_type = intra_mode_to_tx_type(mbmi, PLANE_TYPE_UV);
}
- return DCT_DCT;
}
-#endif // CONFIG_MRC_TX
- if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] > TX_32X32 ||
- (txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi)))
- return DCT_DCT;
- if (mbmi->sb_type >= BLOCK_8X8 || CONFIG_CB4X4) {
- if (plane_type == PLANE_TYPE_Y) {
-#if !ALLOW_INTRA_EXT_TX
- if (is_inter_block(mbmi))
-#endif // ALLOW_INTRA_EXT_TX
- return mbmi->tx_type;
- }
+ assert(tx_type < TX_TYPES);
+ if (!av1_ext_tx_used[tx_set_type][tx_type]) return DCT_DCT;
+ return tx_type;
+}
- if (is_inter_block(mbmi)) {
-// UV Inter only
-#if CONFIG_CHROMA_2X2
- if (tx_size < TX_4X4) return DCT_DCT;
-#endif
- return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] >= TX_32X32)
- ? DCT_DCT
- : mbmi->tx_type;
- }
- }
+void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y,
+ const int num_planes);
-#if CONFIG_CB4X4
- (void)block;
-#if CONFIG_CHROMA_2X2
- if (tx_size < TX_4X4)
- return DCT_DCT;
- else
-#endif // CONFIG_CHROMA_2X2
- return intra_mode_to_tx_type_context[get_uv_mode(mbmi->uv_mode)];
-#else // CONFIG_CB4X4
- // Sub8x8-Inter/Intra OR UV-Intra
- if (is_inter_block(mbmi)) { // Sub8x8-Inter
- return DCT_DCT;
- } else { // Sub8x8 Intra OR UV-Intra
- const int block_raster_idx =
- av1_block_index_to_raster_order(tx_size, block);
- return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
- ? get_y_mode(mi, block_raster_idx)
- : get_uv_mode(mbmi->uv_mode)];
- }
-#endif // CONFIG_CB4X4
-#else // CONFIG_EXT_TX
- (void)block;
-#if CONFIG_MRC_TX
- if (mbmi->tx_type == MRC_DCT) {
- if (plane_type == PLANE_TYPE_Y && !xd->lossless[mbmi->segment_id]) {
- assert(tx_size == TX_32X32);
- return mbmi->tx_type;
- }
- return DCT_DCT;
+static INLINE int bsize_to_max_depth(BLOCK_SIZE bsize) {
+ TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
+ int depth = 0;
+ while (depth < MAX_TX_DEPTH && tx_size != TX_4X4) {
+ depth++;
+ tx_size = sub_tx_size_map[tx_size];
}
-#endif // CONFIG_MRC_TX
- if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
- txsize_sqr_map[tx_size] >= TX_32X32)
- return DCT_DCT;
- return mbmi->tx_type;
-#endif // CONFIG_EXT_TX
+ return depth;
}
-void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
-
-static INLINE int tx_size_to_depth(TX_SIZE tx_size) {
- return (int)(tx_size - TX_SIZE_LUMA_MIN);
+static INLINE int bsize_to_tx_size_cat(BLOCK_SIZE bsize) {
+ TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
+ assert(tx_size != TX_4X4);
+ int depth = 0;
+ while (tx_size != TX_4X4) {
+ depth++;
+ tx_size = sub_tx_size_map[tx_size];
+ assert(depth < 10);
+ }
+ assert(depth <= MAX_TX_CATS);
+ return depth - 1;
}
-static INLINE TX_SIZE depth_to_tx_size(int depth) {
- return (TX_SIZE)(depth + TX_SIZE_LUMA_MIN);
+static INLINE TX_SIZE depth_to_tx_size(int depth, BLOCK_SIZE bsize) {
+ TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
+ TX_SIZE tx_size = max_tx_size;
+ for (int d = 0; d < depth; ++d) tx_size = sub_tx_size_map[tx_size];
+ return tx_size;
}
-static INLINE TX_SIZE av1_get_uv_tx_size(const MB_MODE_INFO *mbmi,
- const struct macroblockd_plane *pd) {
-#if CONFIG_CHROMA_2X2
- assert(mbmi->tx_size > TX_2X2);
-#endif // CONFIG_CHROMA_2X2
-
-#if CONFIG_SUPERTX
- if (supertx_enabled(mbmi))
- return uvsupertx_size_lookup[txsize_sqr_map[mbmi->tx_size]]
- [pd->subsampling_x][pd->subsampling_y];
-#endif // CONFIG_SUPERTX
+static INLINE TX_SIZE av1_get_adjusted_tx_size(TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_64X64:
+ case TX_64X32:
+ case TX_32X64: return TX_32X32;
+ case TX_64X16: return TX_32X16;
+ case TX_16X64: return TX_16X32;
+ default: return tx_size;
+ }
+}
- const TX_SIZE uv_txsize =
- uv_txsize_lookup[mbmi->sb_type][mbmi->tx_size][pd->subsampling_x]
- [pd->subsampling_y];
- assert(uv_txsize != TX_INVALID);
- return uv_txsize;
+static INLINE TX_SIZE av1_get_max_uv_txsize(BLOCK_SIZE bsize, int subsampling_x,
+ int subsampling_y) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, subsampling_x, subsampling_y);
+ assert(plane_bsize < BLOCK_SIZES_ALL);
+ const TX_SIZE uv_tx = max_txsize_rect_lookup[plane_bsize];
+ return av1_get_adjusted_tx_size(uv_tx);
}
static INLINE TX_SIZE av1_get_tx_size(int plane, const MACROBLOCKD *xd) {
- const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+ if (xd->lossless[mbmi->segment_id]) return TX_4X4;
if (plane == 0) return mbmi->tx_size;
const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
- return av1_get_uv_tx_size(mbmi, pd);
-}
-
-static INLINE BLOCK_SIZE
-get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) {
- return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
+ return av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
+ pd->subsampling_y);
}
void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize);
+ BLOCK_SIZE bsize, const int num_planes);
+
+void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes);
+
+void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes);
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
int blk_row, int blk_col,
@@ -1433,54 +941,31 @@ void av1_foreach_transformed_block_in_plane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
foreach_transformed_block_visitor visit, void *arg);
-#if CONFIG_LV_MAP
void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
BLOCK_SIZE bsize, int mi_row, int mi_col,
foreach_transformed_block_visitor visit,
- void *arg);
-#endif
-
-#if CONFIG_COEF_INTERLEAVE
-static INLINE int get_max_4x4_size(int num_4x4, int mb_to_edge,
- int subsampling) {
- return num_4x4 + (mb_to_edge >= 0 ? 0 : mb_to_edge >> (5 + subsampling));
-}
-
-void av1_foreach_transformed_block_interleave(
- const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
- foreach_transformed_block_visitor visit, void *arg);
-#endif
+ void *arg, const int num_planes);
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- int plane, TX_SIZE tx_size, int has_eob, int aoff,
- int loff);
+ int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int has_eob, int aoff, int loff);
+
+#define MAX_INTERINTRA_SB_SQUARE 32 * 32
+static INLINE int is_interintra_mode(const MB_MODE_INFO *mbmi) {
+ return (mbmi->ref_frame[0] > INTRA_FRAME &&
+ mbmi->ref_frame[1] == INTRA_FRAME);
+}
static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
-#if CONFIG_INTERINTRA
- // TODO(debargha): Should this be bsize < BLOCK_LARGEST?
- return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64);
-#else
- (void)bsize;
- return 0;
-#endif // CONFIG_INTERINTRA
+ return (bsize >= BLOCK_8X8) && (bsize <= BLOCK_32X32);
}
static INLINE int is_interintra_allowed_mode(const PREDICTION_MODE mode) {
-#if CONFIG_INTERINTRA
return (mode >= NEARESTMV) && (mode <= NEWMV);
-#else
- (void)mode;
- return 0;
-#endif // CONFIG_INTERINTRA
}
static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) {
-#if CONFIG_INTERINTRA
return (rf[0] > INTRA_FRAME) && (rf[1] <= INTRA_FRAME);
-#else
- (void)rf;
- return 0;
-#endif // CONFIG_INTERINTRA
}
static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) {
@@ -1501,54 +986,30 @@ static INLINE int is_interintra_allowed_bsize_group(int group) {
}
static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
- return (mbmi->ref_frame[1] == INTRA_FRAME) && is_interintra_allowed(mbmi);
-}
-
-#if CONFIG_VAR_TX
-static INLINE int get_vartx_max_txsize(const MB_MODE_INFO *const mbmi,
- BLOCK_SIZE bsize, int subsampled) {
-#if CONFIG_CB4X4
- (void)mbmi;
- TX_SIZE max_txsize = max_txsize_rect_lookup[bsize];
-#else
- TX_SIZE max_txsize = mbmi->sb_type < BLOCK_8X8
- ? max_txsize_rect_lookup[mbmi->sb_type]
- : max_txsize_rect_lookup[bsize];
-#endif // CONFIG_C4X4
-
-#if CONFIG_EXT_PARTITION && CONFIG_TX64X64
- // The decoder is designed so that it can process 64x64 luma pixels at a
- // time. If this is a chroma plane with subsampling and bsize corresponds to
- // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
- // mustn't be used for the subsampled plane (because it would be bigger than
- // a 64x64 luma block) so we round down to TX_32X32.
- if (subsampled && max_txsize == TX_64X64) max_txsize = TX_32X32;
-#else
- (void)subsampled;
-#endif
+ return mbmi->ref_frame[0] > INTRA_FRAME &&
+ mbmi->ref_frame[1] == INTRA_FRAME && is_interintra_allowed(mbmi);
+}
- return max_txsize;
+static INLINE int get_vartx_max_txsize(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
+ int plane) {
+ if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;
+ const TX_SIZE max_txsize = max_txsize_rect_lookup[bsize];
+ if (plane == 0) return max_txsize; // luma
+ return av1_get_adjusted_tx_size(max_txsize); // chroma
}
-#endif // CONFIG_VAR_TX
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize) {
return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
}
static INLINE int is_motion_variation_allowed_compound(
const MB_MODE_INFO *mbmi) {
-#if CONFIG_COMPOUND_SINGLEREF
- if (!has_second_ref(mbmi) && !is_inter_singleref_comp_mode(mbmi->mode))
-#else
if (!has_second_ref(mbmi))
-#endif // CONFIG_COMPOUND_SINGLEREF
return 1;
else
return 0;
}
-#if CONFIG_MOTION_VAR
// input: log2 of length, 0(4), 1(8), ...
static const int max_neighbor_obmc[6] = { 0, 1, 2, 3, 4, 4 };
@@ -1556,102 +1017,53 @@ static INLINE int check_num_overlappable_neighbors(const MB_MODE_INFO *mbmi) {
return !(mbmi->overlappable_neighbors[0] == 0 &&
mbmi->overlappable_neighbors[1] == 0);
}
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-static INLINE NCOBMC_MODE ncobmc_mode_allowed_bsize(BLOCK_SIZE bsize) {
- if (bsize < BLOCK_8X8 || bsize >= BLOCK_64X64)
- return NO_OVERLAP;
- else
- return MAX_NCOBMC_MODES;
-}
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#endif // CONFIG_MOTION_VAR
-static INLINE MOTION_MODE motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION
- int block, const WarpedMotionParams *gm_params,
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- const MACROBLOCKD *xd,
-#endif
- const MODE_INFO *mi) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
-#if CONFIG_AMVR
- if (xd->cur_frame_mv_precision_level == 0) {
-#endif
-#if CONFIG_GLOBAL_MOTION
+static INLINE MOTION_MODE
+motion_mode_allowed(const WarpedMotionParams *gm_params, const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi, int allow_warped_motion) {
+ if (xd->cur_frame_force_integer_mv == 0) {
const TransformationType gm_type = gm_params[mbmi->ref_frame[0]].wmtype;
- if (is_global_mv_block(mi, block, gm_type)) return SIMPLE_TRANSLATION;
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_AMVR
+ if (is_global_mv_block(mbmi, gm_type)) return SIMPLE_TRANSLATION;
}
-#endif
if (is_motion_variation_allowed_bsize(mbmi->sb_type) &&
is_inter_mode(mbmi->mode) && mbmi->ref_frame[1] != INTRA_FRAME &&
is_motion_variation_allowed_compound(mbmi)) {
-#if CONFIG_MOTION_VAR
if (!check_num_overlappable_neighbors(mbmi)) return SIMPLE_TRANSLATION;
-#endif
-#if CONFIG_WARPED_MOTION
- if (!has_second_ref(mbmi) && mbmi->num_proj_ref[0] >= 1 &&
- !av1_is_scaled(&(xd->block_refs[0]->sf))) {
-#if CONFIG_AMVR
- if (xd->cur_frame_mv_precision_level) {
+ assert(!has_second_ref(mbmi));
+ if (mbmi->num_proj_ref[0] >= 1 &&
+ (allow_warped_motion && !av1_is_scaled(&(xd->block_refs[0]->sf)))) {
+ if (xd->cur_frame_force_integer_mv) {
return OBMC_CAUSAL;
}
-#endif
return WARPED_CAUSAL;
}
-
-#endif // CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- if (ncobmc_mode_allowed_bsize(mbmi->sb_type) < NO_OVERLAP)
- return NCOBMC_ADAPT_WEIGHT;
- else
-#endif
- return OBMC_CAUSAL;
-#else
- return SIMPLE_TRANSLATION;
-#endif // CONFIG_MOTION_VAR
+ return OBMC_CAUSAL;
} else {
return SIMPLE_TRANSLATION;
}
}
static INLINE void assert_motion_mode_valid(MOTION_MODE mode,
-#if CONFIG_GLOBAL_MOTION
- int block,
const WarpedMotionParams *gm_params,
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
const MACROBLOCKD *xd,
-#endif
- const MODE_INFO *mi) {
- const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION
- block, gm_params,
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- xd,
-#endif
- mi);
+ const MB_MODE_INFO *mbmi,
+ int allow_warped_motion) {
+ const MOTION_MODE last_motion_mode_allowed =
+ motion_mode_allowed(gm_params, xd, mbmi, allow_warped_motion);
// Check that the input mode is not illegal
if (last_motion_mode_allowed < mode)
assert(0 && "Illegal motion mode selected");
}
-#if CONFIG_MOTION_VAR
static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
return (is_inter_block(mbmi));
}
-#endif // CONFIG_MOTION_VAR
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
static INLINE int av1_allow_palette(int allow_screen_content_tools,
BLOCK_SIZE sb_type) {
- return allow_screen_content_tools && sb_type >= BLOCK_8X8 &&
- sb_type <= BLOCK_LARGEST;
+ return allow_screen_content_tools && block_size_wide[sb_type] <= 64 &&
+ block_size_high[sb_type] <= 64 && sb_type >= BLOCK_8X8;
}
// Returns sub-sampled dimensions of the given block.
@@ -1677,10 +1089,21 @@ static INLINE void av1_get_block_dimensions(BLOCK_SIZE bsize, int plane,
assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_y == 0));
assert(block_width >= block_cols);
assert(block_height >= block_rows);
- if (width) *width = block_width >> pd->subsampling_x;
- if (height) *height = block_height >> pd->subsampling_y;
- if (rows_within_bounds) *rows_within_bounds = block_rows >> pd->subsampling_y;
- if (cols_within_bounds) *cols_within_bounds = block_cols >> pd->subsampling_x;
+ const int plane_block_width = block_width >> pd->subsampling_x;
+ const int plane_block_height = block_height >> pd->subsampling_y;
+ // Special handling for chroma sub8x8.
+ const int is_chroma_sub8_x = plane > 0 && plane_block_width < 4;
+ const int is_chroma_sub8_y = plane > 0 && plane_block_height < 4;
+ if (width) *width = plane_block_width + 2 * is_chroma_sub8_x;
+ if (height) *height = plane_block_height + 2 * is_chroma_sub8_y;
+ if (rows_within_bounds) {
+ *rows_within_bounds =
+ (block_rows >> pd->subsampling_y) + 2 * is_chroma_sub8_y;
+ }
+ if (cols_within_bounds) {
+ *cols_within_bounds =
+ (block_cols >> pd->subsampling_x) + 2 * is_chroma_sub8_x;
+ }
}
/* clang-format off */
@@ -1701,39 +1124,22 @@ typedef struct {
ColorCost color_cost;
} Av1ColorMapParam;
-#if CONFIG_GLOBAL_MOTION
-static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd) {
- const MODE_INFO *mi = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
+static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi) {
int ref;
-#if CONFIG_CB4X4
- const int unify_bsize = 1;
-#else
- const int unify_bsize = 0;
-#endif
- // First check if all modes are ZEROMV
- if (mbmi->sb_type >= BLOCK_8X8 || unify_bsize) {
- if (mbmi->mode != ZEROMV && mbmi->mode != ZERO_ZEROMV) return 0;
- } else {
- if ((mi->bmi[0].as_mode != ZEROMV && mi->bmi[0].as_mode != ZERO_ZEROMV) ||
- (mi->bmi[1].as_mode != ZEROMV && mi->bmi[1].as_mode != ZERO_ZEROMV) ||
- (mi->bmi[2].as_mode != ZEROMV && mi->bmi[2].as_mode != ZERO_ZEROMV) ||
- (mi->bmi[3].as_mode != ZEROMV && mi->bmi[3].as_mode != ZERO_ZEROMV))
- return 0;
- }
+ // First check if all modes are GLOBALMV
+ if (mbmi->mode != GLOBALMV && mbmi->mode != GLOBAL_GLOBALMV) return 0;
-#if !GLOBAL_SUB8X8_USED
- if (mbmi->sb_type < BLOCK_8X8) return 0;
-#endif
+ if (AOMMIN(mi_size_wide[mbmi->sb_type], mi_size_high[mbmi->sb_type]) < 2)
+ return 0;
// Now check if all global motion is non translational
for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- if (xd->global_motion[mbmi->ref_frame[ref]].wmtype <= TRANSLATION) return 0;
+ if (xd->global_motion[mbmi->ref_frame[ref]].wmtype == TRANSLATION) return 0;
}
return 1;
}
-#endif // CONFIG_GLOBAL_MOTION
static INLINE PLANE_TYPE get_plane_type(int plane) {
return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
@@ -1771,6 +1177,16 @@ static INLINE void transpose_int32(int32_t *dst, int dst_stride,
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
+static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
+ if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
+ return 1024;
+ }
+ if (tx_size == TX_16X64 || tx_size == TX_64X16) {
+ return 512;
+ }
+ return tx_size_2d[tx_size];
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/cdef.c b/third_party/aom/av1/common/cdef.c
index 397a14845..c9b974900 100644
--- a/third_party/aom/av1/common/cdef.c
+++ b/third_party/aom/av1/common/cdef.c
@@ -13,7 +13,8 @@
#include <math.h>
#include <string.h>
-#include "./aom_scale_rtcd.h"
+#include "config/aom_scale_rtcd.h"
+
#include "aom/aom_integer.h"
#include "av1/common/cdef.h"
#include "av1/common/cdef_block.h"
@@ -21,7 +22,6 @@
#include "av1/common/reconinter.h"
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
- int r, c;
int maxc, maxr;
int skip = 1;
maxc = cm->mi_cols - mi_col;
@@ -30,38 +30,40 @@ int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
maxr = AOMMIN(maxr, MI_SIZE_64X64);
maxc = AOMMIN(maxc, MI_SIZE_64X64);
- for (r = 0; r < maxr; r++) {
- for (c = 0; c < maxc; c++) {
- skip = skip &&
- cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]
- ->mbmi.skip;
+ for (int r = 0; r < maxr; r++) {
+ for (int c = 0; c < maxc; c++) {
+ skip =
+ skip &&
+ cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]->skip;
}
}
return skip;
}
-static int is_8x8_block_skip(MODE_INFO **grid, int mi_row, int mi_col,
+static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col,
int mi_stride) {
int is_skip = 1;
for (int r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
for (int c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
- is_skip &= grid[(mi_row + r) * mi_stride + (mi_col + c)]->mbmi.skip;
+ is_skip &= grid[(mi_row + r) * mi_stride + (mi_col + c)]->skip;
return is_skip;
}
int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- cdef_list *dlist, int filter_skip) {
- int r, c;
- int maxc, maxr;
- MODE_INFO **grid;
- int count = 0;
- grid = cm->mi_grid_visible;
- maxc = cm->mi_cols - mi_col;
- maxr = cm->mi_rows - mi_row;
+ cdef_list *dlist, BLOCK_SIZE bs) {
+ MB_MODE_INFO **grid = cm->mi_grid_visible;
+ int maxc = cm->mi_cols - mi_col;
+ int maxr = cm->mi_rows - mi_row;
- maxr = AOMMIN(maxr, MI_SIZE_64X64);
- maxc = AOMMIN(maxc, MI_SIZE_64X64);
+ if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
+ maxc = AOMMIN(maxc, MI_SIZE_128X128);
+ else
+ maxc = AOMMIN(maxc, MI_SIZE_64X64);
+ if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
+ maxr = AOMMIN(maxr, MI_SIZE_128X128);
+ else
+ maxr = AOMMIN(maxr, MI_SIZE_64X64);
const int r_step = mi_size_high[BLOCK_8X8];
const int c_step = mi_size_wide[BLOCK_8X8];
@@ -71,36 +73,25 @@ int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
assert(r_step == 1 || r_step == 2);
assert(c_step == 1 || c_step == 2);
- if (filter_skip) {
- for (r = 0; r < maxr; r += r_step) {
- for (c = 0; c < maxc; c += c_step) {
+ int count = 0;
+
+ for (int r = 0; r < maxr; r += r_step) {
+ for (int c = 0; c < maxc; c += c_step) {
+ if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride)) {
dlist[count].by = r >> r_shift;
dlist[count].bx = c >> c_shift;
- dlist[count].skip =
- is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride);
+ dlist[count].skip = 0;
count++;
}
}
- } else {
- for (r = 0; r < maxr; r += r_step) {
- for (c = 0; c < maxc; c += c_step) {
- if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride)) {
- dlist[count].by = r >> r_shift;
- dlist[count].bx = c >> c_shift;
- dlist[count].skip = 0;
- count++;
- }
- }
- }
}
return count;
}
void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
int sstride, int v, int h) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < h; j++) {
+ for (int i = 0; i < v; i++) {
+ for (int j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
}
@@ -109,36 +100,30 @@ void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
const uint16_t *src, int sstride, int v,
int h) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < h; j++) {
+ for (int i = 0; i < v; i++) {
+ for (int j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
}
}
-static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
+static void copy_sb8_16(AOM_UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset,
int sstride, int vsize, int hsize) {
-#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
const uint16_t *base =
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
} else {
-#endif
const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
-#if CONFIG_HIGHBITDEPTH
}
-#endif
}
static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
uint16_t x) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < h; j++) {
+ for (int i = 0; i < v; i++) {
+ for (int j = 0; j < h; j++) {
dst[i * dstride + j] = x;
}
}
@@ -146,9 +131,8 @@ static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
int sstride, int v, int h) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < h; j++) {
+ for (int i = 0; i < v; i++) {
+ for (int j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
}
@@ -156,9 +140,8 @@ static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd) {
- int fbr, fbc;
- int nhfb, nvfb;
- uint16_t src[CDEF_INBUF_SIZE];
+ const int num_planes = av1_num_planes(cm);
+ DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
uint16_t *linebuf[3];
uint16_t *colbuf[3];
cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
@@ -166,48 +149,42 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int cdef_count;
int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
- int stride;
int mi_wide_l2[3];
int mi_high_l2[3];
int xdec[3];
int ydec[3];
- int pli;
- int cdef_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
- int nplanes = MAX_MB_PLANE;
- int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
- xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
- nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
+ const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
+ num_planes);
row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
prev_row_cdef = row_cdef + 1;
curr_row_cdef = prev_row_cdef + nhfb + 2;
- for (pli = 0; pli < nplanes; pli++) {
+ for (int pli = 0; pli < num_planes; pli++) {
xdec[pli] = xd->plane[pli].subsampling_x;
ydec[pli] = xd->plane[pli].subsampling_y;
mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
- if (xdec[pli] != ydec[pli]) nplanes = 1;
}
- stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
- for (pli = 0; pli < nplanes; pli++) {
+ const int stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
+ for (int pli = 0; pli < num_planes; pli++) {
linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
colbuf[pli] =
aom_malloc(sizeof(*colbuf) *
((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
CDEF_HBORDER);
}
- for (fbr = 0; fbr < nvfb; fbr++) {
- for (pli = 0; pli < nplanes; pli++) {
+ for (int fbr = 0; fbr < nvfb; fbr++) {
+ for (int pli = 0; pli < num_planes; pli++) {
const int block_height =
(MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
CDEF_VERY_LARGE);
}
- cdef_left = 1;
- for (fbc = 0; fbc < nhfb; fbc++) {
+ int cdef_left = 1;
+ for (int fbc = 0; fbc < nhfb; fbc++) {
int level, sec_strength;
int uv_level, uv_sec_strength;
int nhb, nvb;
@@ -217,38 +194,43 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MI_SIZE_64X64 * fbc] == NULL ||
cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
MI_SIZE_64X64 * fbc]
- ->mbmi.cdef_strength == -1) {
+ ->cdef_strength == -1) {
cdef_left = 0;
continue;
}
if (!cdef_left) cstart = -CDEF_HBORDER;
nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
- int tile_top, tile_left, tile_bottom, tile_right;
- int mi_idx = MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
- MODE_INFO *const mi_tl = cm->mi + mi_idx;
- BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
- tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
- tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
+ int frame_top, frame_left, frame_bottom, frame_right;
+
+ int mi_row = MI_SIZE_64X64 * fbr;
+ int mi_col = MI_SIZE_64X64 * fbc;
+ // for the current filter block, it's top left corner mi structure (mi_tl)
+ // is first accessed to check whether the top and left boundaries are
+ // frame boundaries. Then bottom-left and top-right mi structures are
+ // accessed to check whether the bottom and right boundaries
+ // (respectively) are frame boundaries.
+ //
+ // Note that we can't just check the bottom-right mi structure - eg. if
+ // we're at the right-hand edge of the frame but not the bottom, then
+ // the bottom-right mi is NULL but the bottom-left is not.
+ frame_top = (mi_row == 0) ? 1 : 0;
+ frame_left = (mi_col == 0) ? 1 : 0;
- if (fbr != nvfb - 1 &&
- (&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
- tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
- .mbmi.boundary_info &
- TILE_BOTTOM_BOUNDARY;
+ if (fbr != nvfb - 1)
+ frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
else
- tile_bottom = 1;
+ frame_bottom = 1;
- if (fbc != nhfb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
- tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
- TILE_RIGHT_BOUNDARY;
+ if (fbc != nhfb - 1)
+ frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
else
- tile_right = 1;
+ frame_right = 1;
const int mbmi_cdef_strength =
cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
MI_SIZE_64X64 * fbc]
- ->mbmi.cdef_strength;
+ ->cdef_strength;
level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
sec_strength =
cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
@@ -259,23 +241,15 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
uv_sec_strength += uv_sec_strength == 3;
if ((level == 0 && sec_strength == 0 && uv_level == 0 &&
uv_sec_strength == 0) ||
- (cdef_count = sb_compute_cdef_list(
- cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist,
-#if CONFIG_CDEF_SINGLEPASS
- (level & 1) || (uv_level & 1))) == 0)
-#else
- get_filter_skip(level) || get_filter_skip(uv_level))) == 0)
-#endif
- {
+ (cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
+ fbc * MI_SIZE_64X64, dlist,
+ BLOCK_64X64)) == 0) {
cdef_left = 0;
continue;
}
curr_row_cdef[fbc] = 1;
- for (pli = 0; pli < nplanes; pli++) {
-#if !CONFIG_CDEF_SINGLEPASS
- uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE];
-#endif
+ for (int pli = 0; pli < num_planes; pli++) {
int coffset;
int rend, cend;
int pri_damping = cm->cdef_pri_damping;
@@ -284,10 +258,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int vsize = nvb << mi_high_l2[pli];
if (pli) {
- if (chroma_cdef)
- level = uv_level;
- else
- level = 0;
+ level = uv_level;
sec_strength = uv_sec_strength;
}
@@ -375,81 +346,57 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
(MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
- if (tile_top) {
+ if (frame_top) {
fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
CDEF_VERY_LARGE);
}
- if (tile_left) {
+ if (frame_left) {
fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
CDEF_VERY_LARGE);
}
- if (tile_bottom) {
+ if (frame_bottom) {
fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
}
- if (tile_right) {
+ if (frame_right) {
fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
}
-#if CONFIG_HIGHBITDEPTH
+
if (cm->use_highbitdepth) {
cdef_filter_fb(
-#if CONFIG_CDEF_SINGLEPASS
NULL,
- &CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#else
- (uint8_t *)&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#endif
- [xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
+ &CONVERT_TO_SHORTPTR(
+ xd->plane[pli]
+ .dst.buf)[xd->plane[pli].dst.stride *
+ (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride,
-#else
- xd->plane[pli].dst.stride, dst,
-#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
- sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
-#endif
} else {
-#endif
cdef_filter_fb(
&xd->plane[pli]
.dst.buf[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
NULL, xd->plane[pli].dst.stride,
-#else
- xd->plane[pli].dst.stride, dst,
-#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
- sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
-#endif
-
-#if CONFIG_HIGHBITDEPTH
}
-#endif
}
cdef_left = 1;
}
{
- unsigned char *tmp;
- tmp = prev_row_cdef;
+ unsigned char *tmp = prev_row_cdef;
prev_row_cdef = curr_row_cdef;
curr_row_cdef = tmp;
}
}
aom_free(row_cdef);
- for (pli = 0; pli < nplanes; pli++) {
+ for (int pli = 0; pli < num_planes; pli++) {
aom_free(linebuf[pli]);
aom_free(colbuf[pli]);
}
diff --git a/third_party/aom/av1/common/cdef.h b/third_party/aom/av1/common/cdef.h
index 9de24bf92..092230de9 100644
--- a/third_party/aom/av1/common/cdef.h
+++ b/third_party/aom/av1/common/cdef.h
@@ -11,12 +11,13 @@
#ifndef AV1_COMMON_CDEF_H_
#define AV1_COMMON_CDEF_H_
-#define CDEF_STRENGTH_BITS 7
+#define CDEF_STRENGTH_BITS 6
-#define CDEF_PRI_STRENGTHS 32
+#define CDEF_PRI_STRENGTHS 16
#define CDEF_SEC_STRENGTHS 4
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#include "av1/common/cdef_block.h"
@@ -38,7 +39,7 @@ extern "C" {
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- cdef_list *dlist, int filter_skip);
+ cdef_list *dlist, BLOCK_SIZE bsize);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
diff --git a/third_party/aom/av1/common/cdef_block.c b/third_party/aom/av1/common/cdef_block.c
index aaa32c950..df1de89be 100644
--- a/third_party/aom/av1/common/cdef_block.c
+++ b/third_party/aom/av1/common/cdef_block.c
@@ -12,28 +12,13 @@
#include <math.h>
#include <stdlib.h>
-#ifdef HAVE_CONFIG_H
-#include "./config.h"
-#endif
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-#include "./cdef.h"
+#include "av1/common/cdef.h"
/* Generated from gen_filter_tables.c. */
-#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
-const int cdef_directions[8][3] = {
- { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
- { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
- { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
- { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
- { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
-};
-#else
-const int cdef_directions[8][2] = {
+DECLARE_ALIGNED(16, const int, cdef_directions[8][2]) = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
{ 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
@@ -43,7 +28,6 @@ const int cdef_directions[8][2] = {
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
{ 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
};
-#endif
/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
The search minimizes the weighted variance along all the lines in a
@@ -123,65 +107,38 @@ int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
return best_dir;
}
-#if CONFIG_CDEF_SINGLEPASS
-#if CDEF_FULL
-const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
-const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
-#else
const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
-#endif
/* Smooth in the direction detected. */
-#if CDEF_CAP
-void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
- const uint16_t *in, int pri_strength, int sec_strength,
- int dir, int pri_damping, int sec_damping, int bsize,
- UNUSED int max_unused)
-#else
void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength, int sec_strength,
int dir, int pri_damping, int sec_damping, int bsize,
- int max)
-#endif
-{
+ AOM_UNUSED int max_unused, int coeff_shift) {
int i, j, k;
const int s = CDEF_BSTRIDE;
- const int *pri_taps = cdef_pri_taps[pri_strength & 1];
- const int *sec_taps = cdef_sec_taps[pri_strength & 1];
- for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
- for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
+ const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
+ const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
+ for (i = 0; i < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_4X8); i++) {
+ for (j = 0; j < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_8X4); j++) {
int16_t sum = 0;
int16_t y;
int16_t x = in[i * s + j];
-#if CDEF_CAP
int max = x;
int min = x;
-#endif
-#if CDEF_FULL
- for (k = 0; k < 3; k++)
-#else
- for (k = 0; k < 2; k++)
-#endif
- {
+ for (k = 0; k < 2; k++) {
int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
-#if CDEF_CAP
if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
min = AOMMIN(p0, min);
min = AOMMIN(p1, min);
-#endif
-#if CDEF_FULL
- if (k == 2) continue;
-#endif
int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
-#if CDEF_CAP
if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
@@ -190,17 +147,12 @@ void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
min = AOMMIN(s1, min);
min = AOMMIN(s2, min);
min = AOMMIN(s3, min);
-#endif
sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
}
-#if CDEF_CAP
y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
-#else
- y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
-#endif
if (dst8)
dst8[i * dstride + j] = (uint8_t)y;
else
@@ -209,67 +161,6 @@ void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
}
}
-#else
-
-/* Smooth in the direction detected. */
-void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- int j;
- int k;
- static const int taps[3] = { 3, 2, 1 };
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- int16_t sum;
- int16_t xx;
- int16_t yy;
- xx = in[i * CDEF_BSTRIDE + j];
- sum = 0;
- for (k = 0; k < 3; k++) {
- int16_t p0;
- int16_t p1;
- p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
- p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
- sum += taps[k] * constrain(p0, threshold, damping);
- sum += taps[k] * constrain(p1, threshold, damping);
- }
- sum = (sum + 8) >> 4;
- yy = xx + sum;
- y[i * ystride + j] = yy;
- }
- }
-}
-
-/* Smooth in the direction detected. */
-void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- int j;
- int k;
- static const int taps[2] = { 4, 1 };
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- int16_t sum;
- int16_t xx;
- int16_t yy;
- xx = in[i * CDEF_BSTRIDE + j];
- sum = 0;
- for (k = 0; k < 2; k++) {
- int16_t p0;
- int16_t p1;
- p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
- p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
- sum += taps[k] * constrain(p0, threshold, damping);
- sum += taps[k] * constrain(p1, threshold, damping);
- }
- sum = (sum + 8) >> 4;
- yy = xx + sum;
- y[i * ystride + j] = yy;
- }
- }
-}
-#endif
-
/* Compute the primary filter strength for an 8x8 block based on the
directional variance difference. A high variance difference means
that we have a highly directional pattern (e.g. a high contrast
@@ -282,172 +173,26 @@ static INLINE int adjust_strength(int strength, int32_t var) {
return var ? (strength * (4 + i) + 8) >> 4 : 0;
}
-#if !CONFIG_CDEF_SINGLEPASS
-void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
- cdef_list *dlist, int cdef_count,
- int bsize) {
- int bi, bx, by;
-
- if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8);
- }
- } else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
- &src[bi << (3 + 2)], 4);
- copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
- dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
- }
- } else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
- &src[bi << (2 + 3)], 8);
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
- dstride, &src[(bi << (2 + 3)) + 4], 8);
- }
- } else {
- assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << (2 + 2)], 4);
- }
- }
-}
-
-void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
- const uint16_t *src, cdef_list *dlist,
- int cdef_count, int bsize) {
- int bi, bx, by;
- if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8);
- }
- } else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
- &src[bi << (3 + 2)], 4);
- copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
- dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
- }
- } else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
- &src[bi << (2 + 3)], 8);
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
- &src[(bi << (2 + 3)) + 4], 8);
- }
- } else {
- assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << (2 * 2)], 4);
- }
- }
-}
-
-int get_filter_skip(int level) {
- int filter_skip = level & 1;
- if (level == 1) filter_skip = 0;
- return filter_skip;
-}
-
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int sec_damping, int pri_damping,
- int coeff_shift, int skip_dering, int hbd) {
-#else
-
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift) {
-#endif
int bi;
int bx;
int by;
int bsize, bsizex, bsizey;
-#if CONFIG_CDEF_SINGLEPASS
- int pri_strength = (level >> 1) << coeff_shift;
- int filter_skip = level & 1;
- if (!pri_strength && !sec_strength && filter_skip) {
- pri_strength = 19 << coeff_shift;
- sec_strength = 7 << coeff_shift;
- }
-#else
- int threshold = (level >> 1) << coeff_shift;
- int filter_skip = get_filter_skip(level);
- if (level == 1) threshold = 31 << coeff_shift;
-
- cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
- cdef_direction_8x8 };
-#endif
+ int pri_strength = level << coeff_shift;
+ sec_strength <<= coeff_shift;
sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec;
bsizey = 3 - ydec;
-#if CONFIG_CDEF_SINGLEPASS
- if (dirinit && pri_strength == 0 && sec_strength == 0)
-#else
- if (!skip_dering)
-#endif
- {
-#if CONFIG_CDEF_SINGLEPASS
+ if (dirinit && pri_strength == 0 && sec_strength == 0) {
// If we're here, both primary and secondary strengths are 0, and
// we still haven't written anything to y[] yet, so we just copy
// the input to y[]. This is necessary only for av1_cdef_search()
@@ -455,97 +200,16 @@ void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
-#else
- if (pli == 0) {
- if (!dirinit || !*dirinit) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
- CDEF_BSTRIDE, &var[by][bx], coeff_shift);
- }
- if (dirinit) *dirinit = 1;
- }
- }
- // Only run dering for non-zero threshold (which is always the case for
- // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
- // something out in y[] later.
- if (threshold != 0) {
- assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- (cdef_direction[bsize == BLOCK_8X8])(
- &y[bi << (bsizex + bsizey)], 1 << bsizex,
- &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
- pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
- pri_damping);
- }
- }
- }
-
- if (sec_strength) {
- if (threshold && !skip_dering)
- copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- int py = by << bsizey;
- int px = bx << bsizex;
-
- if (!filter_skip && dlist[bi].skip) continue;
- if (!dst || hbd) {
- // 16 bit destination if high bitdepth or 8 bit destination not given
- (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
- : aom_clpf_hblock_hbd)(
- dst ? (uint16_t *)dst + py * dstride + px
- : &y[bi << (bsizex + bsizey)],
- in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
- CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
- sec_damping);
- } else {
- // Do clpf and write the result to an 8 bit destination
- (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
- : aom_clpf_hblock)(
- dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
- CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
- sec_damping);
- }
- }
- } else if (threshold != 0) {
- // No clpf, so copy instead
- if (hbd) {
- copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
- bsize);
- } else {
- copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
- }
- } else if (dirinit) {
- // If we're here, both dering and clpf are off, and we still haven't written
- // anything to y[] yet, so we just copy the input to y[]. This is necessary
- // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
-#endif
int iy, ix;
// TODO(stemidts/jmvalin): SIMD optimisations
for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++)
-#if CONFIG_CDEF_SINGLEPASS
dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#else
- y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#endif
in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
}
-#if CONFIG_CDEF_SINGLEPASS
return;
-#endif
}
-#if CONFIG_CDEF_SINGLEPASS
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) {
@@ -557,19 +221,28 @@ void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
if (dirinit) *dirinit = 1;
}
}
+ if (pli == 1 && xdec != ydec) {
+ for (bi = 0; bi < cdef_count; bi++) {
+ static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
+ static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
+ by = dlist[bi].by;
+ bx = dlist[bi].bx;
+ dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
+ }
+ }
- assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
for (bi = 0; bi < cdef_count; bi++) {
- int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
- int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
+ int t = dlist[bi].skip ? 0 : pri_strength;
+ int s = dlist[bi].skip ? 0 : sec_strength;
by = dlist[bi].by;
bx = dlist[bi].bx;
if (dst8)
- cdef_filter_block(
- &dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
- &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
- (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
- pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
+ cdef_filter_block(&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL,
+ dstride,
+ &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
+ (pli ? t : adjust_strength(t, var[by][bx])), s,
+ t ? dir[by][bx] : 0, pri_damping, sec_damping, bsize,
+ (256 << coeff_shift) - 1, coeff_shift);
else
cdef_filter_block(
NULL,
@@ -578,7 +251,7 @@ void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
dirinit ? 1 << bsizex : dstride,
&in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
(pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
- pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
+ pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1,
+ coeff_shift);
}
-#endif
}
diff --git a/third_party/aom/av1/common/cdef_block.h b/third_party/aom/av1/common/cdef_block.h
index bf277faad..81c6da077 100644
--- a/third_party/aom/av1/common/cdef_block.h
+++ b/third_party/aom/av1/common/cdef_block.h
@@ -12,43 +12,28 @@
#if !defined(_CDEF_BLOCK_H)
#define _CDEF_BLOCK_H (1)
-#include "./odintrin.h"
+#include "av1/common/odintrin.h"
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
-#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
-#if CONFIG_CDEF_SINGLEPASS
+#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
-#endif
/* We need to buffer three vertical lines. */
#define CDEF_VBORDER (3)
/* We only need to buffer three horizontal pixels too, but let's align to
16 bytes (8 x 16 bits) to make vectorization easier. */
#define CDEF_HBORDER (8)
-#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3)
+#define CDEF_BSTRIDE \
+ ALIGN_POWER_OF_TWO((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER, 3)
#define CDEF_VERY_LARGE (30000)
-#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
-
-#if CONFIG_CDEF_SINGLEPASS
-// Filter configuration
-#define CDEF_CAP 1 // 1 = Cap change to largest diff
-#define CDEF_FULL 0 // 1 = 7x7 filter, 0 = 5x5 filter
+#define CDEF_INBUF_SIZE \
+ (CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER))
-#if CDEF_FULL
-extern const int cdef_pri_taps[2][3];
-extern const int cdef_sec_taps[2][2];
-extern const int cdef_directions[8][3];
-#else
extern const int cdef_pri_taps[2][2];
extern const int cdef_sec_taps[2][2];
-extern const int cdef_directions[8][2];
-#endif
-
-#else // CONFIG_CDEF_SINGLEPASS
-extern const int cdef_directions[8][3];
-#endif
+DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]);
typedef struct {
uint8_t by;
@@ -56,35 +41,19 @@ typedef struct {
uint8_t skip;
} cdef_list;
-#if CONFIG_CDEF_SINGLEPASS
typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
int dstride, const uint16_t *in,
int pri_strength, int sec_strength,
int dir, int pri_damping,
- int sec_damping, int bsize, int max);
+ int sec_damping, int bsize, int max,
+ int coeff_shift);
void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
cdef_list *dlist, int cdef_count, int bsize);
-#else
-typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
- const uint16_t *in, int threshold, int dir,
- int damping);
-int get_filter_skip(int level);
-#endif
-
-#if CONFIG_CDEF_SINGLEPASS
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift);
-#else
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int sec_damping, int pri_damping,
- int coeff_shift, int skip_dering, int hbd);
-#endif
#endif
diff --git a/third_party/aom/av1/common/cdef_block_avx2.c b/third_party/aom/av1/common/cdef_block_avx2.c
index 5e48045c0..e2b85b3e2 100644
--- a/third_party/aom/av1/common/cdef_block_avx2.c
+++ b/third_party/aom/av1/common/cdef_block_avx2.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_avx2
-#include "./cdef_block_simd.h"
+#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_neon.c b/third_party/aom/av1/common/cdef_block_neon.c
index 030b32531..2d6bc65e3 100644
--- a/third_party/aom/av1/common/cdef_block_neon.c
+++ b/third_party/aom/av1/common/cdef_block_neon.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
-#include "./cdef_block_simd.h"
+#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_simd.h b/third_party/aom/av1/common/cdef_block_simd.h
index aa7d3c3ca..d24a7c0fa 100644
--- a/third_party/aom/av1/common/cdef_block_simd.h
+++ b/third_party/aom/av1/common/cdef_block_simd.h
@@ -9,8 +9,9 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./av1_rtcd.h"
-#include "./cdef_block.h"
+#include "config/av1_rtcd.h"
+
+#include "av1/common/cdef_block.h"
/* partial A is a 16-bit vector of the form:
[x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
@@ -167,39 +168,22 @@ int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
v128_sub_16(v128_shr_s16(lines[i], coeff_shift), v128_dup_16(128));
}
-#if defined(__SSE4_1__)
/* Compute "mostly vertical" directions. */
- __m128i dir47 = compute_directions(lines, cost + 4);
+ v128 dir47 = compute_directions(lines, cost + 4);
array_reverse_transpose_8x8(lines, lines);
/* Compute "mostly horizontal" directions. */
- __m128i dir03 = compute_directions(lines, cost);
-
- __m128i max = _mm_max_epi32(dir03, dir47);
- max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(1, 0, 3, 2)));
- max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(2, 3, 0, 1)));
- best_cost = _mm_cvtsi128_si32(max);
- __m128i t =
- _mm_packs_epi32(_mm_cmpeq_epi32(max, dir03), _mm_cmpeq_epi32(max, dir47));
- best_dir = _mm_movemask_epi8(_mm_packs_epi16(t, t));
+ v128 dir03 = compute_directions(lines, cost);
+
+ v128 max = v128_max_s32(dir03, dir47);
+ max = v128_max_s32(max, v128_align(max, max, 8));
+ max = v128_max_s32(max, v128_align(max, max, 4));
+ best_cost = v128_low_u32(max);
+ v128 t =
+ v128_pack_s32_s16(v128_cmpeq_32(max, dir47), v128_cmpeq_32(max, dir03));
+ best_dir = v128_movemask_8(v128_pack_s16_s8(t, t));
best_dir = get_msb(best_dir ^ (best_dir - 1)); // Count trailing zeros
-#else
- /* Compute "mostly vertical" directions. */
- compute_directions(lines, cost + 4);
-
- array_reverse_transpose_8x8(lines, lines);
-
- /* Compute "mostly horizontal" directions. */
- compute_directions(lines, cost);
-
- for (i = 0; i < 8; i++) {
- if (cost[i] > best_cost) {
- best_cost = cost[i];
- best_dir = i;
- }
- }
-#endif
/* Difference between the optimal variance and the variance along the
orthogonal direction. Again, the sum(x^2) terms cancel out. */
@@ -211,17 +195,16 @@ int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
}
// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
+SIMD_INLINE v256 constrain16(v256 a, v256 b, unsigned int threshold,
unsigned int adjdamp) {
- v128 diff = v128_sub_16(a, b);
- const v128 sign = v128_shr_n_s16(diff, 15);
- diff = v128_abs_s16(diff);
- const v128 s =
- v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
- return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
+ v256 diff = v256_sub_16(a, b);
+ const v256 sign = v256_shr_n_s16(diff, 15);
+ diff = v256_abs_s16(diff);
+ const v256 s =
+ v256_ssub_u16(v256_dup_16(threshold), v256_shr_u16(diff, adjdamp));
+ return v256_xor(v256_add_16(sign, v256_min_s16(diff, s)), sign);
}
-#if CONFIG_CDEF_SINGLEPASS
// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
unsigned int adjdamp) {
@@ -236,37 +219,24 @@ SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
sign);
}
-#if CDEF_CAP
-void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- UNUSED int max_unused)
-#else
void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir,
int pri_damping, int sec_damping,
- int max)
-#endif
-{
+ AOM_UNUSED int max_unused,
+ int coeff_shift) {
v128 p0, p1, p2, p3;
v256 sum, row, tap, res;
-#if CDEF_CAP
v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-#endif
int po1 = cdef_directions[dir][0];
int po2 = cdef_directions[dir][1];
-#if CDEF_FULL
- int po3 = cdef_directions[dir][2];
-#endif
int s1o1 = cdef_directions[(dir + 2) & 7][0];
int s1o2 = cdef_directions[(dir + 2) & 7][1];
int s2o1 = cdef_directions[(dir + 6) & 7][0];
int s2o2 = cdef_directions[(dir + 6) & 7][1];
- const int *pri_taps = cdef_pri_taps[pri_strength & 1];
- const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+ const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
+ const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
if (pri_strength)
pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
@@ -278,9 +248,7 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
v64_load_aligned(&in[1 * CDEF_BSTRIDE]),
v64_load_aligned(&in[2 * CDEF_BSTRIDE]),
v64_load_aligned(&in[3 * CDEF_BSTRIDE]));
-#if CDEF_CAP
max = min = row;
-#endif
if (pri_strength) {
// Primary near taps
@@ -288,19 +256,15 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, pri_strength, pri_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po1]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, pri_strength, pri_damping);
// sum += pri_taps[0] * (p0 + p1)
@@ -313,52 +277,21 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, pri_strength, pri_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po2]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, pri_strength, pri_damping);
// sum += pri_taps[1] * (p0 + p1)
sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]),
v256_from_v128(v128_ziphi_8(p0, p1),
v128_ziplo_8(p0, p1))));
-
-#if CDEF_FULL
- // Primary extra taps
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po3]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po3]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po3]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po3]));
-#if CDEF_CAP
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
-#endif
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po3]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po3]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po3]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po3]));
-#if CDEF_CAP
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
-#endif
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[2] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[2]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
-#endif
}
if (sec_strength) {
@@ -367,37 +300,29 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o1]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o1]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p2 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o1]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o1]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o1]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p3 = constrain(tap, row, sec_strength, sec_damping);
// sum += sec_taps[0] * (p0 + p1 + p2 + p3)
@@ -412,37 +337,29 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o2]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o2]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p2 = constrain(tap, row, sec_strength, sec_damping);
tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o2]),
v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o2]),
v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o2]),
v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p3 = constrain(tap, row, sec_strength, sec_damping);
// sum += sec_taps[1] * (p0 + p1 + p2 + p3)
@@ -459,11 +376,7 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
res = v256_add_16(sum, v256_dup_16(8));
res = v256_shr_n_s16(res, 4);
res = v256_add_16(row, res);
-#if CDEF_CAP
res = v256_min_s16(v256_max_s16(res, min), max);
-#else
- res = v256_min_s16(v256_max_s16(res, v256_zero()), v256_dup_16(max));
-#endif
res = v256_pack_s16_u8(res, res);
p0 = v256_low_v128(res);
@@ -473,38 +386,25 @@ void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
u32_store_aligned(&dst[3 * dstride], v64_low_u32(v128_low_v64(p0)));
}
-#if CDEF_CAP
void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir,
int pri_damping, int sec_damping,
- UNUSED int max_unused)
-#else
-void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- int max)
-#endif
-{
+ AOM_UNUSED int max_unused,
+ int coeff_shift) {
int i;
v128 p0, p1, p2, p3;
v256 sum, row, res, tap;
-#if CDEF_CAP
v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-#endif
int po1 = cdef_directions[dir][0];
int po2 = cdef_directions[dir][1];
-#if CDEF_FULL
- int po3 = cdef_directions[dir][2];
-#endif
int s1o1 = cdef_directions[(dir + 2) & 7][0];
int s1o2 = cdef_directions[(dir + 2) & 7][1];
int s2o1 = cdef_directions[(dir + 6) & 7][0];
int s2o2 = cdef_directions[(dir + 6) & 7][1];
- const int *pri_taps = cdef_pri_taps[pri_strength & 1];
- const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+ const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
+ const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
if (pri_strength)
pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
@@ -515,25 +415,19 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]),
v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-#if CDEF_CAP
max = min = row;
-#endif
// Primary near taps
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, pri_strength, pri_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, pri_strength, pri_damping);
// sum += pri_taps[0] * (p0 + p1)
@@ -545,18 +439,14 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, pri_strength, pri_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, pri_strength, pri_damping);
// sum += pri_taps[1] * (p0 + p1)
@@ -564,63 +454,30 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
v256_from_v128(v128_ziphi_8(p0, p1),
v128_ziplo_8(p0, p1))));
-#if CDEF_FULL
- // Primary extra taps
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po3]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po3]));
-#if CDEF_CAP
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
-#endif
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po3]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po3]));
-#if CDEF_CAP
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
-#endif
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[2] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[2]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
-#endif
-
// Secondary near taps
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p2 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p3 = constrain(tap, row, sec_strength, sec_damping);
// sum += sec_taps[0] * (p0 + p1 + p2 + p3)
@@ -634,34 +491,26 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p0 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p1 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p2 = constrain(tap, row, sec_strength, sec_damping);
tap =
v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
-#if CDEF_CAP
max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
min = v256_min_s16(min, tap);
-#endif
p3 = constrain(tap, row, sec_strength, sec_damping);
// sum += sec_taps[1] * (p0 + p1 + p2 + p3)
@@ -676,11 +525,7 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
res = v256_add_16(sum, v256_dup_16(8));
res = v256_shr_n_s16(res, 4);
res = v256_add_16(row, res);
-#if CDEF_CAP
res = v256_min_s16(v256_max_s16(res, min), max);
-#else
- res = v256_min_s16(v256_max_s16(res, v256_zero()), v256_dup_16(max));
-#endif
res = v256_pack_s16_u8(res, res);
p0 = v256_low_v128(res);
@@ -689,499 +534,355 @@ void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
}
}
-#if CDEF_CAP
void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir,
int pri_damping, int sec_damping,
- UNUSED int max_unused)
-#else
-void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- int max)
-#endif
-{
+ AOM_UNUSED int max_unused,
+ int coeff_shift) {
int i;
- v128 p0, p1, p2, p3, sum, row, res;
-#if CDEF_CAP
- v128 max, min, large = v128_dup_16(CDEF_VERY_LARGE);
-#endif
+ v256 p0, p1, p2, p3, sum, row, res;
+ v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
int po1 = cdef_directions[dir][0];
int po2 = cdef_directions[dir][1];
-#if CDEF_FULL
- int po3 = cdef_directions[dir][2];
-#endif
int s1o1 = cdef_directions[(dir + 2) & 7][0];
int s1o2 = cdef_directions[(dir + 2) & 7][1];
int s2o1 = cdef_directions[(dir + 6) & 7][0];
int s2o2 = cdef_directions[(dir + 6) & 7][1];
- const int *pri_taps = cdef_pri_taps[pri_strength & 1];
- const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+ const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
+ const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
if (pri_strength)
pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
if (sec_strength)
sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
- for (i = 0; i < 4; i += 2) {
- sum = v128_zero();
- row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-#if CDEF_CAP
+ for (i = 0; i < 4; i += 4) {
+ sum = v256_zero();
+ row = v256_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
+ v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]),
+ v64_load_aligned(&in[(i + 2) * CDEF_BSTRIDE]),
+ v64_load_aligned(&in[(i + 3) * CDEF_BSTRIDE]));
min = max = row;
-#endif
// Primary near taps
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
-#if CDEF_CAP
+ p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po1]));
+ p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po1]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
+ min = v256_min_s16(v256_min_s16(min, p0), p1);
p0 = constrain16(p0, row, pri_strength, pri_damping);
p1 = constrain16(p1, row, pri_strength, pri_damping);
// sum += pri_taps[0] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[0]), v128_add_16(p0, p1)));
+ sum = v256_add_16(
+ sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1)));
// Primary far taps
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
-#if CDEF_CAP
+ p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po2]));
+ p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po2]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
+ min = v256_min_s16(v256_min_s16(min, p0), p1);
p0 = constrain16(p0, row, pri_strength, pri_damping);
p1 = constrain16(p1, row, pri_strength, pri_damping);
// sum += pri_taps[1] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[1]), v128_add_16(p0, p1)));
-
-#if CDEF_FULL
- // Primary extra taps
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po3]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po3]));
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po3]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po3]));
-#if CDEF_CAP
- max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[2] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[2]), v128_add_16(p0, p1)));
-#endif
+ sum = v256_add_16(
+ sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1)));
// Secondary near taps
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
- p2 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
- p3 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
-#if CDEF_CAP
+ p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o1]));
+ p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o1]));
+ p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o1]));
+ p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o1]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o1]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
- v128_andn(p3, v128_cmpeq_16(p3, large)));
- min = v128_min_s16(
- v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
+ v256_andn(p3, v256_cmpeq_16(p3, large)));
+ min = v256_min_s16(
+ v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
p0 = constrain16(p0, row, sec_strength, sec_damping);
p1 = constrain16(p1, row, sec_strength, sec_damping);
p2 = constrain16(p2, row, sec_strength, sec_damping);
p3 = constrain16(p3, row, sec_strength, sec_damping);
// sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[0]),
- v128_add_16(v128_add_16(p0, p1),
- v128_add_16(p2, p3))));
+ sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]),
+ v256_add_16(v256_add_16(p0, p1),
+ v256_add_16(p2, p3))));
// Secondary far taps
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
- p2 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
- p3 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
-#if CDEF_CAP
+ p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o2]));
+ p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o2]));
+ p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o2]));
+ p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
+ v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]),
+ v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o2]),
+ v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o2]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
- v128_andn(p3, v128_cmpeq_16(p3, large)));
- min = v128_min_s16(
- v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
+ v256_andn(p3, v256_cmpeq_16(p3, large)));
+ min = v256_min_s16(
+ v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
p0 = constrain16(p0, row, sec_strength, sec_damping);
p1 = constrain16(p1, row, sec_strength, sec_damping);
p2 = constrain16(p2, row, sec_strength, sec_damping);
p3 = constrain16(p3, row, sec_strength, sec_damping);
// sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[1]),
- v128_add_16(v128_add_16(p0, p1),
- v128_add_16(p2, p3))));
+ sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]),
+ v256_add_16(v256_add_16(p0, p1),
+ v256_add_16(p2, p3))));
// res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v128_add_16(sum, v128_cmplt_s16(sum, v128_zero()));
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
-#if CDEF_CAP
- res = v128_min_s16(v128_max_s16(res, min), max);
-#else
- res = v128_min_s16(v128_max_s16(res, v128_zero()), v128_dup_16(max));
-#endif
- v64_store_aligned(&dst[i * dstride], v128_high_v64(res));
- v64_store_aligned(&dst[(i + 1) * dstride], v128_low_v64(res));
+ sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
+ res = v256_add_16(sum, v256_dup_16(8));
+ res = v256_shr_n_s16(res, 4);
+ res = v256_add_16(row, res);
+ res = v256_min_s16(v256_max_s16(res, min), max);
+
+ v64_store_aligned(&dst[i * dstride], v128_high_v64(v256_high_v128(res)));
+ v64_store_aligned(&dst[(i + 1) * dstride],
+ v128_low_v64(v256_high_v128(res)));
+ v64_store_aligned(&dst[(i + 2) * dstride],
+ v128_high_v64(v256_low_v128(res)));
+ v64_store_aligned(&dst[(i + 3) * dstride],
+ v128_low_v64(v256_low_v128(res)));
}
}
-#if CDEF_CAP
void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir,
int pri_damping, int sec_damping,
- UNUSED int max_unused)
-#else
-void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- int max)
-#endif
-{
+ AOM_UNUSED int max_unused,
+ int coeff_shift) {
int i;
- v128 sum, p0, p1, p2, p3, row, res;
-#if CDEF_CAP
- v128 max, min, large = v128_dup_16(CDEF_VERY_LARGE);
-#endif
+ v256 sum, p0, p1, p2, p3, row, res;
+ v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
int po1 = cdef_directions[dir][0];
int po2 = cdef_directions[dir][1];
-#if CDEF_FULL
- int po3 = cdef_directions[dir][2];
-#endif
int s1o1 = cdef_directions[(dir + 2) & 7][0];
int s1o2 = cdef_directions[(dir + 2) & 7][1];
int s2o1 = cdef_directions[(dir + 6) & 7][0];
int s2o2 = cdef_directions[(dir + 6) & 7][1];
- const int *pri_taps = cdef_pri_taps[pri_strength & 1];
- const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+ const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
+ const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
if (pri_strength)
pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
if (sec_strength)
sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
- for (i = 0; i < 8; i++) {
- sum = v128_zero();
- row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
+ for (i = 0; i < 8; i += 2) {
+ sum = v256_zero();
+ row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]),
+ v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-#if CDEF_CAP
min = max = row;
-#endif
// Primary near taps
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]);
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]);
-#if CDEF_CAP
+ p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
+ p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
+ min = v256_min_s16(v256_min_s16(min, p0), p1);
p0 = constrain16(p0, row, pri_strength, pri_damping);
p1 = constrain16(p1, row, pri_strength, pri_damping);
// sum += pri_taps[0] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[0]), v128_add_16(p0, p1)));
+ sum = v256_add_16(
+ sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1)));
// Primary far taps
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]);
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]);
-#if CDEF_CAP
+ p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
+ p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
+ min = v256_min_s16(v256_min_s16(min, p0), p1);
p0 = constrain16(p0, row, pri_strength, pri_damping);
p1 = constrain16(p1, row, pri_strength, pri_damping);
// sum += pri_taps[1] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[1]), v128_add_16(p0, p1)));
-
-#if CDEF_FULL
- // Primary extra taps
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po3]);
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po3]);
-#if CDEF_CAP
- max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
- min = v128_min_s16(v128_min_s16(min, p0), p1);
-#endif
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[2] * (p0 + p1)
- sum = v128_add_16(
- sum, v128_mullo_s16(v128_dup_16(pri_taps[2]), v128_add_16(p0, p1)));
-#endif
+ sum = v256_add_16(
+ sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1)));
// Secondary near taps
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]);
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]);
- p2 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]);
- p3 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]);
-#if CDEF_CAP
+ p0 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
+ p1 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
+ p2 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
+ p3 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
- v128_andn(p3, v128_cmpeq_16(p3, large)));
- min = v128_min_s16(
- v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
+ v256_andn(p3, v256_cmpeq_16(p3, large)));
+ min = v256_min_s16(
+ v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
p0 = constrain16(p0, row, sec_strength, sec_damping);
p1 = constrain16(p1, row, sec_strength, sec_damping);
p2 = constrain16(p2, row, sec_strength, sec_damping);
p3 = constrain16(p3, row, sec_strength, sec_damping);
// sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[0]),
- v128_add_16(v128_add_16(p0, p1),
- v128_add_16(p2, p3))));
+ sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]),
+ v256_add_16(v256_add_16(p0, p1),
+ v256_add_16(p2, p3))));
// Secondary far taps
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]);
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]);
- p2 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]);
- p3 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]);
-#if CDEF_CAP
+ p0 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
+ p1 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
+ p2 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
+ p3 =
+ v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
+ v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
- v128_andn(p1, v128_cmpeq_16(p1, large)));
+ v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
+ v256_andn(p1, v256_cmpeq_16(p1, large)));
max =
- v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
- v128_andn(p3, v128_cmpeq_16(p3, large)));
- min = v128_min_s16(
- v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
-#endif
+ v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
+ v256_andn(p3, v256_cmpeq_16(p3, large)));
+ min = v256_min_s16(
+ v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
p0 = constrain16(p0, row, sec_strength, sec_damping);
p1 = constrain16(p1, row, sec_strength, sec_damping);
p2 = constrain16(p2, row, sec_strength, sec_damping);
p3 = constrain16(p3, row, sec_strength, sec_damping);
// sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[1]),
- v128_add_16(v128_add_16(p0, p1),
- v128_add_16(p2, p3))));
+ sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]),
+ v256_add_16(v256_add_16(p0, p1),
+ v256_add_16(p2, p3))));
// res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v128_add_16(sum, v128_cmplt_s16(sum, v128_zero()));
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
-#if CDEF_CAP
- res = v128_min_s16(v128_max_s16(res, min), max);
-#else
- res = v128_min_s16(v128_max_s16(res, v128_zero()), v128_dup_16(max));
-#endif
- v128_store_unaligned(&dst[i * dstride], res);
+ sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
+ res = v256_add_16(sum, v256_dup_16(8));
+ res = v256_shr_n_s16(res, 4);
+ res = v256_add_16(row, res);
+ res = v256_min_s16(v256_max_s16(res, min), max);
+ v128_store_unaligned(&dst[i * dstride], v256_high_v128(res));
+ v128_store_unaligned(&dst[(i + 1) * dstride], v256_low_v128(res));
}
}
void SIMD_FUNC(cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir, int pri_damping,
- int sec_damping, int bsize, int max) {
- if (dst8)
- (bsize == BLOCK_8X8 ? SIMD_FUNC(cdef_filter_block_8x8_8)
- : SIMD_FUNC(cdef_filter_block_4x4_8))(
- dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max);
- else
- (bsize == BLOCK_8X8 ? SIMD_FUNC(cdef_filter_block_8x8_16)
- : SIMD_FUNC(cdef_filter_block_4x4_16))(
- dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max);
-}
-
-#else
-
-void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- v128 p0, p1, sum, row, res;
- int o1 = cdef_directions[dir][0];
- int o2 = cdef_directions[dir][1];
-
- if (threshold) damping -= get_msb(threshold);
- for (i = 0; i < 4; i += 2) {
- sum = v128_zero();
- row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 4 * (p0 + p1)
- sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 1 * (p0 + p1)
- sum = v128_add_16(sum, v128_add_16(p0, p1));
-
- // res = row + ((sum + 8) >> 4)
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
- v64_store_aligned(&y[i * ystride], v128_high_v64(res));
- v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
- }
-}
-
-void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- v128 sum, p0, p1, row, res;
- int o1 = cdef_directions[dir][0];
- int o2 = cdef_directions[dir][1];
- int o3 = cdef_directions[dir][2];
-
- if (threshold) damping -= get_msb(threshold);
- for (i = 0; i < 8; i++) {
- sum = v128_zero();
- row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 3 * (p0 + p1)
- p0 = v128_add_16(p0, p1);
- p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
- sum = v128_add_16(sum, p0);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 2 * (p0 + p1)
- p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
- sum = v128_add_16(sum, p0);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += (p0 + p1)
- p0 = v128_add_16(p0, p1);
- sum = v128_add_16(sum, p0);
-
- // res = row + ((sum + 8) >> 4)
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
- v128_store_unaligned(&y[i * ystride], res);
- }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 8; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- row = v128_pack_s16_u8(row, row);
- v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
- }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 4; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- row = v128_pack_s16_u8(row, row);
- u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
- }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 8; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- v128_store_unaligned(&dst[i * dstride], row);
- }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 4; i++) {
- v64 row = v64_load_unaligned(&src[i * sstride]);
- v64_store_unaligned(&dst[i * dstride], row);
+ int sec_damping, int bsize, int max,
+ int coeff_shift) {
+ if (dst8) {
+ if (bsize == BLOCK_8X8) {
+ SIMD_FUNC(cdef_filter_block_8x8_8)
+ (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ } else if (bsize == BLOCK_4X8) {
+ SIMD_FUNC(cdef_filter_block_4x4_8)
+ (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ SIMD_FUNC(cdef_filter_block_4x4_8)
+ (dst8 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength,
+ sec_strength, dir, pri_damping, sec_damping, max, coeff_shift);
+ } else if (bsize == BLOCK_8X4) {
+ SIMD_FUNC(cdef_filter_block_4x4_8)
+ (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ SIMD_FUNC(cdef_filter_block_4x4_8)
+ (dst8 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ } else {
+ SIMD_FUNC(cdef_filter_block_4x4_8)
+ (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ }
+ } else {
+ if (bsize == BLOCK_8X8) {
+ SIMD_FUNC(cdef_filter_block_8x8_16)
+ (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ } else if (bsize == BLOCK_4X8) {
+ SIMD_FUNC(cdef_filter_block_4x4_16)
+ (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ SIMD_FUNC(cdef_filter_block_4x4_16)
+ (dst16 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength,
+ sec_strength, dir, pri_damping, sec_damping, max, coeff_shift);
+ } else if (bsize == BLOCK_8X4) {
+ SIMD_FUNC(cdef_filter_block_4x4_16)
+ (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ SIMD_FUNC(cdef_filter_block_4x4_16)
+ (dst16 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ } else {
+ assert(bsize == BLOCK_4X4);
+ SIMD_FUNC(cdef_filter_block_4x4_16)
+ (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+ sec_damping, max, coeff_shift);
+ }
}
}
-#endif
void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
const uint8_t *src, int sstride, int v,
diff --git a/third_party/aom/av1/common/cdef_block_sse2.c b/third_party/aom/av1/common/cdef_block_sse2.c
index f3de763fa..73f115d17 100644
--- a/third_party/aom/av1/common/cdef_block_sse2.c
+++ b/third_party/aom/av1/common/cdef_block_sse2.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
-#include "./cdef_block_simd.h"
+#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_sse4.c b/third_party/aom/av1/common/cdef_block_sse4.c
index 27e9ff32e..349329af6 100644
--- a/third_party/aom/av1/common/cdef_block_sse4.c
+++ b/third_party/aom/av1/common/cdef_block_sse4.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
-#include "./cdef_block_simd.h"
+#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_ssse3.c b/third_party/aom/av1/common/cdef_block_ssse3.c
index 863522199..3a93b150f 100644
--- a/third_party/aom/av1/common/cdef_block_ssse3.c
+++ b/third_party/aom/av1/common/cdef_block_ssse3.c
@@ -11,4 +11,4 @@
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
-#include "./cdef_block_simd.h"
+#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cfl.c b/third_party/aom/av1/common/cfl.c
index f9acfcbc9..ee19f0bcf 100644
--- a/third_party/aom/av1/common/cfl.c
+++ b/third_party/aom/av1/common/cfl.c
@@ -13,20 +13,77 @@
#include "av1/common/common_data.h"
#include "av1/common/onyxc_int.h"
+#include "config/av1_rtcd.h"
+
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
- if (!((cm->subsampling_x == 0 && cm->subsampling_y == 0) ||
- (cm->subsampling_x == 1 && cm->subsampling_y == 1))) {
+ assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
+ assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
+ if (!(cm->subsampling_x == 0 && cm->subsampling_y == 0) &&
+ !(cm->subsampling_x == 1 && cm->subsampling_y == 1) &&
+ !(cm->subsampling_x == 1 && cm->subsampling_y == 0)) {
aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Only 4:4:4 and 4:2:0 are currently supported by CfL");
+ "Only 4:4:4, 4:2:2 and 4:2:0 are currently supported by "
+ "CfL, %d %d subsampling is not supported.\n",
+ cm->subsampling_x, cm->subsampling_y);
}
- memset(&cfl->pred_buf_q3, 0, sizeof(cfl->pred_buf_q3));
+ memset(&cfl->recon_buf_q3, 0, sizeof(cfl->recon_buf_q3));
+ memset(&cfl->ac_buf_q3, 0, sizeof(cfl->ac_buf_q3));
cfl->subsampling_x = cm->subsampling_x;
cfl->subsampling_y = cm->subsampling_y;
cfl->are_parameters_computed = 0;
cfl->store_y = 0;
-#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
- cfl_clear_sub8x8_val(cfl);
-#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+ // The DC_PRED cache is disabled by default and is only enabled in
+ // cfl_rd_pick_alpha
+ cfl->use_dc_pred_cache = 0;
+ cfl->dc_pred_is_cached[CFL_PRED_U] = 0;
+ cfl->dc_pred_is_cached[CFL_PRED_V] = 0;
+}
+
+void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
+ CFL_PRED_TYPE pred_plane, int width) {
+ assert(pred_plane < CFL_PRED_PLANES);
+ assert(width <= CFL_BUF_LINE);
+
+ if (get_bitdepth_data_path_index(xd)) {
+ uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input);
+ memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1);
+ return;
+ }
+
+ memcpy(xd->cfl.dc_pred_cache[pred_plane], input, width);
+}
+
+static void cfl_load_dc_pred_lbd(const int16_t *dc_pred_cache, uint8_t *dst,
+ int dst_stride, int width, int height) {
+ for (int j = 0; j < height; j++) {
+ memcpy(dst, dc_pred_cache, width);
+ dst += dst_stride;
+ }
+}
+
+static void cfl_load_dc_pred_hbd(const int16_t *dc_pred_cache, uint16_t *dst,
+ int dst_stride, int width, int height) {
+ const size_t num_bytes = width << 1;
+ for (int j = 0; j < height; j++) {
+ memcpy(dst, dc_pred_cache, num_bytes);
+ dst += dst_stride;
+ }
+}
+void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
+ TX_SIZE tx_size, CFL_PRED_TYPE pred_plane) {
+ const int width = tx_size_wide[tx_size];
+ const int height = tx_size_high[tx_size];
+ assert(pred_plane < CFL_PRED_PLANES);
+ assert(width <= CFL_BUF_LINE);
+ assert(height <= CFL_BUF_LINE);
+ if (get_bitdepth_data_path_index(xd)) {
+ uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
+ cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride,
+ width, height);
+ return;
+ }
+ cfl_load_dc_pred_lbd(xd->cfl.dc_pred_cache[pred_plane], dst, dst_stride,
+ width, height);
}
// Due to frame boundary issues, it is possible that the total area covered by
@@ -38,217 +95,54 @@ static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
if (diff_width > 0) {
const int min_height = height - diff_height;
- int16_t *pred_buf_q3 = cfl->pred_buf_q3 + (width - diff_width);
+ uint16_t *recon_buf_q3 = cfl->recon_buf_q3 + (width - diff_width);
for (int j = 0; j < min_height; j++) {
- const int last_pixel = pred_buf_q3[-1];
+ const uint16_t last_pixel = recon_buf_q3[-1];
+ assert(recon_buf_q3 + diff_width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE);
for (int i = 0; i < diff_width; i++) {
- pred_buf_q3[i] = last_pixel;
+ recon_buf_q3[i] = last_pixel;
}
- pred_buf_q3 += MAX_SB_SIZE;
+ recon_buf_q3 += CFL_BUF_LINE;
}
cfl->buf_width = width;
}
if (diff_height > 0) {
- int16_t *pred_buf_q3 =
- cfl->pred_buf_q3 + ((height - diff_height) * MAX_SB_SIZE);
+ uint16_t *recon_buf_q3 =
+ cfl->recon_buf_q3 + ((height - diff_height) * CFL_BUF_LINE);
for (int j = 0; j < diff_height; j++) {
- const int16_t *last_row_q3 = pred_buf_q3 - MAX_SB_SIZE;
+ const uint16_t *last_row_q3 = recon_buf_q3 - CFL_BUF_LINE;
+ assert(recon_buf_q3 + width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE);
for (int i = 0; i < width; i++) {
- pred_buf_q3[i] = last_row_q3[i];
+ recon_buf_q3[i] = last_row_q3[i];
}
- pred_buf_q3 += MAX_SB_SIZE;
+ recon_buf_q3 += CFL_BUF_LINE;
}
cfl->buf_height = height;
}
}
-static void sum_above_row_lbd(const uint8_t *above_u, const uint8_t *above_v,
- int width, int *out_sum_u, int *out_sum_v) {
- int sum_u = 0;
- int sum_v = 0;
- for (int i = 0; i < width; i++) {
- sum_u += above_u[i];
- sum_v += above_v[i];
- }
- *out_sum_u += sum_u;
- *out_sum_v += sum_v;
-}
-#if CONFIG_HIGHBITDEPTH
-static void sum_above_row_hbd(const uint16_t *above_u, const uint16_t *above_v,
- int width, int *out_sum_u, int *out_sum_v) {
- int sum_u = 0;
- int sum_v = 0;
- for (int i = 0; i < width; i++) {
- sum_u += above_u[i];
- sum_v += above_v[i];
- }
- *out_sum_u += sum_u;
- *out_sum_v += sum_v;
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-static void sum_above_row(const MACROBLOCKD *xd, int width, int *out_sum_u,
- int *out_sum_v) {
- const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
- const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
-#if CONFIG_HIGHBITDEPTH
- if (get_bitdepth_data_path_index(xd)) {
- const uint16_t *above_u_16 =
- CONVERT_TO_SHORTPTR(pd_u->dst.buf) - pd_u->dst.stride;
- const uint16_t *above_v_16 =
- CONVERT_TO_SHORTPTR(pd_v->dst.buf) - pd_v->dst.stride;
- sum_above_row_hbd(above_u_16, above_v_16, width, out_sum_u, out_sum_v);
- return;
- }
-#endif // CONFIG_HIGHBITDEPTH
- const uint8_t *above_u = pd_u->dst.buf - pd_u->dst.stride;
- const uint8_t *above_v = pd_v->dst.buf - pd_v->dst.stride;
- sum_above_row_lbd(above_u, above_v, width, out_sum_u, out_sum_v);
-}
-
-static void sum_left_col_lbd(const uint8_t *left_u, int u_stride,
- const uint8_t *left_v, int v_stride, int height,
- int *out_sum_u, int *out_sum_v) {
- int sum_u = 0;
- int sum_v = 0;
- for (int i = 0; i < height; i++) {
- sum_u += left_u[i * u_stride];
- sum_v += left_v[i * v_stride];
- }
- *out_sum_u += sum_u;
- *out_sum_v += sum_v;
-}
-#if CONFIG_HIGHBITDEPTH
-static void sum_left_col_hbd(const uint16_t *left_u, int u_stride,
- const uint16_t *left_v, int v_stride, int height,
- int *out_sum_u, int *out_sum_v) {
- int sum_u = 0;
- int sum_v = 0;
- for (int i = 0; i < height; i++) {
- sum_u += left_u[i * u_stride];
- sum_v += left_v[i * v_stride];
- }
- *out_sum_u += sum_u;
- *out_sum_v += sum_v;
-}
-#endif // CONFIG_HIGHBITDEPTH
-static void sum_left_col(const MACROBLOCKD *xd, int height, int *out_sum_u,
- int *out_sum_v) {
- const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
- const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
-
-#if CONFIG_HIGHBITDEPTH
- if (get_bitdepth_data_path_index(xd)) {
- const uint16_t *left_u_16 = CONVERT_TO_SHORTPTR(pd_u->dst.buf) - 1;
- const uint16_t *left_v_16 = CONVERT_TO_SHORTPTR(pd_v->dst.buf) - 1;
- sum_left_col_hbd(left_u_16, pd_u->dst.stride, left_v_16, pd_v->dst.stride,
- height, out_sum_u, out_sum_v);
- return;
- }
-#endif // CONFIG_HIGHBITDEPTH
- const uint8_t *left_u = pd_u->dst.buf - 1;
- const uint8_t *left_v = pd_v->dst.buf - 1;
- sum_left_col_lbd(left_u, pd_u->dst.stride, left_v, pd_v->dst.stride, height,
- out_sum_u, out_sum_v);
-}
-
-// CfL computes its own block-level DC_PRED. This is required to compute both
-// alpha_cb and alpha_cr before the prediction are computed.
-static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
- CFL_CTX *const cfl = xd->cfl;
-
- // Compute DC_PRED until block boundary. We can't assume the neighbor will use
- // the same transform size.
- const int width = max_block_wide(xd, plane_bsize, AOM_PLANE_U)
- << tx_size_wide_log2[0];
- const int height = max_block_high(xd, plane_bsize, AOM_PLANE_U)
- << tx_size_high_log2[0];
- // Number of pixel on the top and left borders.
- const int num_pel = width + height;
-
- int sum_u = 0;
- int sum_v = 0;
-
-// Match behavior of build_intra_predictors_high (reconintra.c) at superblock
-// boundaries:
-// base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
-// base+1 A B .. Y Z
-// base+1 C D .. W X
-// base+1 E F .. U V
-// base+1 G H .. S T T T T T
-// ..
-
-#if CONFIG_CHROMA_SUB8X8
- if (xd->chroma_up_available && xd->mb_to_right_edge >= 0) {
-#else
- if (xd->up_available && xd->mb_to_right_edge >= 0) {
-#endif
- sum_above_row(xd, width, &sum_u, &sum_v);
- } else {
- const int base = 128 << (xd->bd - 8);
- sum_u = width * (base - 1);
- sum_v = width * (base - 1);
- }
-
-#if CONFIG_CHROMA_SUB8X8
- if (xd->chroma_left_available && xd->mb_to_bottom_edge >= 0) {
-#else
- if (xd->left_available && xd->mb_to_bottom_edge >= 0) {
-#endif
- sum_left_col(xd, height, &sum_u, &sum_v);
- } else {
- const int base = 128 << (xd->bd - 8);
- sum_u += height * (base + 1);
- sum_v += height * (base + 1);
+static void subtract_average_c(const uint16_t *src, int16_t *dst, int width,
+ int height, int round_offset, int num_pel_log2) {
+ int sum = round_offset;
+ const uint16_t *recon = src;
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++) {
+ sum += recon[i];
+ }
+ recon += CFL_BUF_LINE;
}
-
- // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
- // not be a power of two. So these divisions will have to use a lookup table.
- cfl->dc_pred[CFL_PRED_U] = (sum_u + (num_pel >> 1)) / num_pel;
- cfl->dc_pred[CFL_PRED_V] = (sum_v + (num_pel >> 1)) / num_pel;
-}
-
-static void cfl_subtract_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
- const int width = cfl->uv_width;
- const int height = cfl->uv_height;
- const int tx_height = tx_size_high[tx_size];
- const int tx_width = tx_size_wide[tx_size];
- const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
- const int num_pel_log2 =
- (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
-
- int16_t *pred_buf_q3 = cfl->pred_buf_q3;
-
- cfl_pad(cfl, width, height);
-
- for (int b_j = 0; b_j < height; b_j += tx_height) {
- for (int b_i = 0; b_i < width; b_i += tx_width) {
- int sum_q3 = 0;
- int16_t *tx_pred_buf_q3 = pred_buf_q3;
- for (int t_j = 0; t_j < tx_height; t_j++) {
- for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
- sum_q3 += tx_pred_buf_q3[t_i];
- }
- tx_pred_buf_q3 += MAX_SB_SIZE;
- }
- int avg_q3 = (sum_q3 + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
- // Loss is never more than 1/2 (in Q3)
- assert(fabs((double)avg_q3 - (sum_q3 / ((double)(1 << num_pel_log2)))) <=
- 0.5);
-
- tx_pred_buf_q3 = pred_buf_q3;
- for (int t_j = 0; t_j < tx_height; t_j++) {
- for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
- tx_pred_buf_q3[t_i] -= avg_q3;
- }
-
- tx_pred_buf_q3 += MAX_SB_SIZE;
- }
+ const int avg = sum >> num_pel_log2;
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++) {
+ dst[i] = src[i] - avg;
}
- pred_buf_q3 += block_row_stride;
+ src += CFL_BUF_LINE;
+ dst += CFL_BUF_LINE;
}
}
+CFL_SUB_AVG_FN(c)
+
static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
CFL_PRED_TYPE pred_type) {
const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
@@ -259,159 +153,218 @@ static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
}
-static void cfl_build_prediction_lbd(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int width, int height,
- int alpha_q3, int dc_pred) {
+static INLINE void cfl_predict_lbd_c(const int16_t *ac_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3, int width,
+ int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
- dst[i] =
- clip_pixel(get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred);
+ dst[i] = clip_pixel(get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i]);
}
dst += dst_stride;
- pred_buf_q3 += MAX_SB_SIZE;
+ ac_buf_q3 += CFL_BUF_LINE;
}
}
-#if CONFIG_HIGHBITDEPTH
-static void cfl_build_prediction_hbd(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int width, int height,
- int alpha_q3, int dc_pred, int bit_depth) {
+// Null function used for invalid tx_sizes
+void cfl_predict_lbd_null(const int16_t *ac_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3) {
+ (void)ac_buf_q3;
+ (void)dst;
+ (void)dst_stride;
+ (void)alpha_q3;
+ assert(0);
+}
+
+CFL_PREDICT_FN(c, lbd)
+
+void cfl_predict_hbd_c(const int16_t *ac_buf_q3, uint16_t *dst, int dst_stride,
+ int alpha_q3, int bit_depth, int width, int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
dst[i] = clip_pixel_highbd(
- get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred, bit_depth);
+ get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i], bit_depth);
}
dst += dst_stride;
- pred_buf_q3 += MAX_SB_SIZE;
+ ac_buf_q3 += CFL_BUF_LINE;
}
}
-#endif // CONFIG_HIGHBITDEPTH
-static void cfl_build_prediction(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int width, int height,
- int alpha_q3, int dc_pred, int use_hbd,
- int bit_depth) {
-#if CONFIG_HIGHBITDEPTH
- if (use_hbd) {
- uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
- cfl_build_prediction_hbd(pred_buf_q3, dst_16, dst_stride, width, height,
- alpha_q3, dc_pred, bit_depth);
- return;
- }
-#endif // CONFIG_HIGHBITDEPTH
- (void)use_hbd;
- (void)bit_depth;
- cfl_build_prediction_lbd(pred_buf_q3, dst, dst_stride, width, height,
- alpha_q3, dc_pred);
+// Null function used for invalid tx_sizes
+void cfl_predict_hbd_null(const int16_t *ac_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd) {
+ (void)ac_buf_q3;
+ (void)dst;
+ (void)dst_stride;
+ (void)alpha_q3;
+ (void)bd;
+ assert(0);
+}
+
+CFL_PREDICT_FN(c, hbd)
+
+static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
+ CFL_CTX *const cfl = &xd->cfl;
+ // Do not call cfl_compute_parameters multiple time on the same values.
+ assert(cfl->are_parameters_computed == 0);
+
+ cfl_pad(cfl, tx_size_wide[tx_size], tx_size_high[tx_size]);
+ get_subtract_average_fn(tx_size)(cfl->recon_buf_q3, cfl->ac_buf_q3);
+ cfl->are_parameters_computed = 1;
}
void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- int row, int col, TX_SIZE tx_size, int plane) {
- CFL_CTX *const cfl = xd->cfl;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ TX_SIZE tx_size, int plane) {
+ CFL_CTX *const cfl = &xd->cfl;
+ MB_MODE_INFO *mbmi = xd->mi[0];
+ assert(is_cfl_allowed(xd));
- // CfL parameters must be computed before prediction can be done.
- assert(cfl->are_parameters_computed == 1);
+ if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
- const int16_t *pred_buf_q3 =
- cfl->pred_buf_q3 + ((row * MAX_SB_SIZE + col) << tx_size_wide_log2[0]);
const int alpha_q3 =
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
+ assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <=
+ CFL_BUF_SQUARE);
+ if (get_bitdepth_data_path_index(xd)) {
+ uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
+ get_predict_hbd_fn(tx_size)(cfl->ac_buf_q3, dst_16, dst_stride, alpha_q3,
+ xd->bd);
+ return;
+ }
+ get_predict_lbd_fn(tx_size)(cfl->ac_buf_q3, dst, dst_stride, alpha_q3);
+}
- cfl_build_prediction(pred_buf_q3, dst, dst_stride, tx_size_wide[tx_size],
- tx_size_high[tx_size], alpha_q3, cfl->dc_pred[plane - 1],
- get_bitdepth_data_path_index(xd), xd->bd);
+// Null function used for invalid tx_sizes
+void cfl_subsample_lbd_null(const uint8_t *input, int input_stride,
+ uint16_t *output_q3) {
+ (void)input;
+ (void)input_stride;
+ (void)output_q3;
+ assert(0);
}
-static void cfl_luma_subsampling_420_lbd(const uint8_t *input, int input_stride,
- int16_t *output_q3, int width,
- int height) {
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- int top = i << 1;
- int bot = top + input_stride;
- output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
- << 1;
+// Null function used for invalid tx_sizes
+void cfl_subsample_hbd_null(const uint16_t *input, int input_stride,
+ uint16_t *output_q3) {
+ (void)input;
+ (void)input_stride;
+ (void)output_q3;
+ assert(0);
+}
+
+static void cfl_luma_subsampling_420_lbd_c(const uint8_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ for (int j = 0; j < height; j += 2) {
+ for (int i = 0; i < width; i += 2) {
+ const int bot = i + input_stride;
+ output_q3[i >> 1] =
+ (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1;
}
input += input_stride << 1;
- output_q3 += MAX_SB_SIZE;
+ output_q3 += CFL_BUF_LINE;
}
}
-static void cfl_luma_subsampling_444_lbd(const uint8_t *input, int input_stride,
- int16_t *output_q3, int width,
- int height) {
+static void cfl_luma_subsampling_422_lbd_c(const uint8_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- output_q3[i] = input[i] << 3;
+ for (int i = 0; i < width; i += 2) {
+ output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
}
input += input_stride;
- output_q3 += MAX_SB_SIZE;
+ output_q3 += CFL_BUF_LINE;
}
}
-#if CONFIG_HIGHBITDEPTH
-static void cfl_luma_subsampling_420_hbd(const uint16_t *input,
- int input_stride, int16_t *output_q3,
- int width, int height) {
+static void cfl_luma_subsampling_444_lbd_c(const uint8_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
- int top = i << 1;
- int bot = top + input_stride;
- output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
- << 1;
+ output_q3[i] = input[i] << 3;
+ }
+ input += input_stride;
+ output_q3 += CFL_BUF_LINE;
+ }
+}
+
+static void cfl_luma_subsampling_420_hbd_c(const uint16_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ for (int j = 0; j < height; j += 2) {
+ for (int i = 0; i < width; i += 2) {
+ const int bot = i + input_stride;
+ output_q3[i >> 1] =
+ (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1;
}
input += input_stride << 1;
- output_q3 += MAX_SB_SIZE;
+ output_q3 += CFL_BUF_LINE;
}
}
-static void cfl_luma_subsampling_444_hbd(const uint16_t *input,
- int input_stride, int16_t *output_q3,
- int width, int height) {
+static void cfl_luma_subsampling_422_hbd_c(const uint16_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i += 2) {
+ output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
+ }
+ input += input_stride;
+ output_q3 += CFL_BUF_LINE;
+ }
+}
+
+static void cfl_luma_subsampling_444_hbd_c(const uint16_t *input,
+ int input_stride,
+ uint16_t *output_q3, int width,
+ int height) {
+ assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
output_q3[i] = input[i] << 3;
}
input += input_stride;
- output_q3 += MAX_SB_SIZE;
+ output_q3 += CFL_BUF_LINE;
}
}
-#endif // CONFIG_HIGHBITDEPTH
-static void cfl_luma_subsampling_420(const uint8_t *input, int input_stride,
- int16_t *output_q3, int width, int height,
- int use_hbd) {
-#if CONFIG_HIGHBITDEPTH
- if (use_hbd) {
- const uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
- cfl_luma_subsampling_420_hbd(input_16, input_stride, output_q3, width,
- height);
- return;
+CFL_GET_SUBSAMPLE_FUNCTION(c)
+
+static INLINE cfl_subsample_hbd_fn cfl_subsampling_hbd(TX_SIZE tx_size,
+ int sub_x, int sub_y) {
+ if (sub_x == 1) {
+ if (sub_y == 1) {
+ return cfl_get_luma_subsampling_420_hbd(tx_size);
+ }
+ return cfl_get_luma_subsampling_422_hbd(tx_size);
}
-#endif // CONFIG_HIGHBITDEPTH
- (void)use_hbd;
- cfl_luma_subsampling_420_lbd(input, input_stride, output_q3, width, height);
+ return cfl_get_luma_subsampling_444_hbd(tx_size);
}
-static void cfl_luma_subsampling_444(const uint8_t *input, int input_stride,
- int16_t *output_q3, int width, int height,
- int use_hbd) {
-#if CONFIG_HIGHBITDEPTH
- if (use_hbd) {
- uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
- cfl_luma_subsampling_444_hbd(input_16, input_stride, output_q3, width,
- height);
- return;
+static INLINE cfl_subsample_lbd_fn cfl_subsampling_lbd(TX_SIZE tx_size,
+ int sub_x, int sub_y) {
+ if (sub_x == 1) {
+ if (sub_y == 1) {
+ return cfl_get_luma_subsampling_420_lbd(tx_size);
+ }
+ return cfl_get_luma_subsampling_422_lbd(tx_size);
}
-#endif // CONFIG_HIGHBITDEPTH
- (void)use_hbd;
- cfl_luma_subsampling_444_lbd(input, input_stride, output_q3, width, height);
+ return cfl_get_luma_subsampling_444_lbd(tx_size);
}
-static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input,
- int input_stride, int row, int col, int width,
- int height, int use_hbd) {
+static void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride,
+ int row, int col, TX_SIZE tx_size, int use_hbd) {
+ const int width = tx_size_wide[tx_size];
+ const int height = tx_size_high[tx_size];
const int tx_off_log2 = tx_size_wide_log2[0];
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
@@ -435,26 +388,22 @@ static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input,
}
// Check that we will remain inside the pixel buffer.
- assert(store_row + store_height <= MAX_SB_SIZE);
- assert(store_col + store_width <= MAX_SB_SIZE);
+ assert(store_row + store_height <= CFL_BUF_LINE);
+ assert(store_col + store_width <= CFL_BUF_LINE);
// Store the input into the CfL pixel buffer
- int16_t *pred_buf_q3 =
- cfl->pred_buf_q3 + (store_row * MAX_SB_SIZE + store_col);
-
- if (sub_y == 0 && sub_x == 0) {
- cfl_luma_subsampling_444(input, input_stride, pred_buf_q3, store_width,
- store_height, use_hbd);
- } else if (sub_y == 1 && sub_x == 1) {
- cfl_luma_subsampling_420(input, input_stride, pred_buf_q3, store_width,
- store_height, use_hbd);
+ uint16_t *recon_buf_q3 =
+ cfl->recon_buf_q3 + (store_row * CFL_BUF_LINE + store_col);
+
+ if (use_hbd) {
+ cfl_subsampling_hbd(tx_size, sub_x, sub_y)(CONVERT_TO_SHORTPTR(input),
+ input_stride, recon_buf_q3);
} else {
- // TODO(ltrudeau) add support for 4:2:2
- assert(0); // Unsupported chroma subsampling
+ cfl_subsampling_lbd(tx_size, sub_x, sub_y)(input, input_stride,
+ recon_buf_q3);
}
}
-#if CONFIG_CHROMA_SUB8X8
// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced
// and non-chroma-referenced blocks are stored together in the CfL buffer.
static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
@@ -471,99 +420,36 @@ static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
(*col_out)++;
}
}
-#if CONFIG_DEBUG
-static INLINE void sub8x8_set_val(CFL_CTX *cfl, int row, int col, int val_high,
- int val_wide) {
- for (int val_r = 0; val_r < val_high; val_r++) {
- assert(row + val_r < CFL_SUB8X8_VAL_MI_SIZE);
- int row_off = (row + val_r) * CFL_SUB8X8_VAL_MI_SIZE;
- for (int val_c = 0; val_c < val_wide; val_c++) {
- assert(col + val_c < CFL_SUB8X8_VAL_MI_SIZE);
- assert(cfl->sub8x8_val[row_off + col + val_c] == 0);
- cfl->sub8x8_val[row_off + col + val_c]++;
- }
- }
-}
-#endif // CONFIG_DEBUG
-#endif // CONFIG_CHROMA_SUB8X8
void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
BLOCK_SIZE bsize) {
- CFL_CTX *const cfl = xd->cfl;
+ CFL_CTX *const cfl = &xd->cfl;
struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
uint8_t *dst =
&pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
- (void)bsize;
-#if CONFIG_CHROMA_SUB8X8
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
// Only dimensions of size 4 can have an odd offset.
assert(!((col & 1) && tx_size_wide[tx_size] != 4));
assert(!((row & 1) && tx_size_high[tx_size] != 4));
sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
- sub8x8_set_val(cfl, row, col, tx_size_high_unit[tx_size],
- tx_size_wide_unit[tx_size]);
-#endif // CONFIG_DEBUG
}
-#endif
- cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
- tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
+ cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size,
+ get_bitdepth_data_path_index(xd));
}
void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
- CFL_CTX *const cfl = xd->cfl;
+ CFL_CTX *const cfl = &xd->cfl;
struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
int row = 0;
int col = 0;
-#if CONFIG_CHROMA_SUB8X8
- bsize = AOMMAX(BLOCK_4X4, bsize);
+
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
sub8x8_adjust_offset(cfl, &row, &col);
-#if CONFIG_DEBUG
- sub8x8_set_val(cfl, row, col, mi_size_high[bsize], mi_size_wide[bsize]);
-#endif // CONFIG_DEBUG
}
-#endif // CONFIG_CHROMA_SUB8X8
const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
- cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height,
+ tx_size = get_tx_size(width, height);
+ cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size,
get_bitdepth_data_path_index(xd));
}
-
-void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
- CFL_CTX *const cfl = xd->cfl;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-
- // Do not call cfl_compute_parameters multiple time on the same values.
- assert(cfl->are_parameters_computed == 0);
-
-#if CONFIG_CHROMA_SUB8X8
- const BLOCK_SIZE plane_bsize = AOMMAX(
- BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
-#if CONFIG_DEBUG
- if (mbmi->sb_type < BLOCK_8X8) {
- for (int val_r = 0; val_r < mi_size_high[mbmi->sb_type]; val_r++) {
- for (int val_c = 0; val_c < mi_size_wide[mbmi->sb_type]; val_c++) {
- assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] == 1);
- }
- }
- cfl_clear_sub8x8_val(cfl);
- }
-#endif // CONFIG_DEBUG
-#else
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
-#endif
- // AOM_PLANE_U is used, but both planes will have the same sizes.
- cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
- cfl->uv_height =
- max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
-
- assert(cfl->buf_width <= cfl->uv_width);
- assert(cfl->buf_height <= cfl->uv_height);
-
- cfl_dc_pred(xd, plane_bsize);
- cfl_subtract_averages(cfl, tx_size);
- cfl->are_parameters_computed = 1;
-}
diff --git a/third_party/aom/av1/common/cfl.h b/third_party/aom/av1/common/cfl.h
index 4ac0b401c..bc9fbce1b 100644
--- a/third_party/aom/av1/common/cfl.h
+++ b/third_party/aom/av1/common/cfl.h
@@ -13,20 +13,290 @@
#define AV1_COMMON_CFL_H_
#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+
+// Can we use CfL for the current block?
+static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ assert(bsize < BLOCK_SIZES_ALL);
+ if (xd->lossless[mbmi->segment_id]) {
+ // In lossless, CfL is available when the partition size is equal to the
+ // transform size.
+ const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
+ const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
+ const int plane_bsize = get_plane_block_size(bsize, ssx, ssy);
+ return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4);
+ }
+ // Spec: CfL is available to luma partitions lesser than or equal to 32x32
+ return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
+ block_size_high[bsize] <= 32);
+}
+
+// Do we need to save the luma pixels from the current block,
+// for a possible future CfL prediction?
+static INLINE CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+
+ if (cm->seq_params.monochrome) return CFL_DISALLOWED;
+
+ if (!xd->cfl.is_chroma_reference) {
+ // For non-chroma-reference blocks, we should always store the luma pixels,
+ // in case the corresponding chroma-reference block uses CfL.
+ // Note that this can only happen for block sizes which are <8 on
+ // their shortest side, as otherwise they would be chroma reference
+ // blocks.
+ return CFL_ALLOWED;
+ }
+
+ // If this block has chroma information, we know whether we're
+ // actually going to perform a CfL prediction
+ return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) &&
+ mbmi->uv_mode == UV_CFL_PRED);
+}
static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
}
+static INLINE CFL_PRED_TYPE get_cfl_pred_type(PLANE_TYPE plane) {
+ assert(plane > 0);
+ return (CFL_PRED_TYPE)(plane - 1);
+}
+
void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- int row, int col, TX_SIZE tx_size, int plane);
+ TX_SIZE tx_size, int plane);
void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
BLOCK_SIZE bsize);
-void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
+void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
+ CFL_PRED_TYPE pred_plane, int width);
+
+void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
+ TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
+
+// Null function used for invalid tx_sizes
+void cfl_subsample_lbd_null(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// Null function used for invalid tx_sizes
+void cfl_subsample_hbd_null(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth.
+#define CFL_lbd_TYPE uint8_t *cfl_type
+#define CFL_hbd_TYPE uint16_t *cfl_type
+
+// Declare a size-specific wrapper for the size-generic function. The compiler
+// will inline the size generic function in here, the advantage is that the size
+// will be constant allowing for loop unrolling and other constant propagated
+// goodness.
+#define CFL_SUBSAMPLE(arch, sub, bd, width, height) \
+ void subsample_##bd##_##sub##_##width##x##height##_##arch( \
+ const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \
+ cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \
+ output_q3, width, height); \
+ }
+
+// Declare size-specific wrappers for all valid CfL sizes.
+#define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \
+ CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \
+ CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \
+ CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \
+ CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \
+ CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \
+ CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \
+ CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \
+ CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \
+ CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \
+ CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \
+ CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \
+ CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \
+ CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \
+ CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \
+ cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \
+ TX_SIZE tx_size) { \
+ CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
+ return subfn_##sub[tx_size]; \
+ }
+
+// Declare an architecture-specific array of function pointers for size-specific
+// wrappers.
+#define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
+ static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \
+ subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \
+ subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \
+ subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \
+ subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \
+ cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \
+ subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \
+ subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \
+ subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \
+ subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \
+ subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \
+ subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \
+ cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \
+ cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \
+ subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \
+ subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \
+ subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \
+ subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \
+ cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \
+ cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \
+ };
+
+// The RTCD script does not support passing in an array, so we wrap it in this
+// function.
+#define CFL_GET_SUBSAMPLE_FUNCTION(arch) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \
+ CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd)
+
+// Null function used for invalid tx_sizes
+static INLINE void cfl_subtract_average_null(const uint16_t *src,
+ int16_t *dst) {
+ (void)dst;
+ (void)src;
+ assert(0);
+}
+
+// Declare a size-specific wrapper for the size-generic function. The compiler
+// will inline the size generic function in here, the advantage is that the size
+// will be constant allowing for loop unrolling and other constant propagated
+// goodness.
+#define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \
+ void subtract_average_##width##x##height##_##arch(const uint16_t *src, \
+ int16_t *dst) { \
+ subtract_average_##arch(src, dst, width, height, round_offset, \
+ num_pel_log2); \
+ }
+
+// Declare size-specific wrappers for all valid CfL sizes.
+#define CFL_SUB_AVG_FN(arch) \
+ CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \
+ CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \
+ CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \
+ CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \
+ CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \
+ CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \
+ CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \
+ CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \
+ CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \
+ CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \
+ CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \
+ CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \
+ CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \
+ CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \
+ cfl_subtract_average_fn get_subtract_average_fn_##arch(TX_SIZE tx_size) { \
+ static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \
+ subtract_average_4x4_##arch, /* 4x4 */ \
+ subtract_average_8x8_##arch, /* 8x8 */ \
+ subtract_average_16x16_##arch, /* 16x16 */ \
+ subtract_average_32x32_##arch, /* 32x32 */ \
+ cfl_subtract_average_null, /* 64x64 (invalid CFL size) */ \
+ subtract_average_4x8_##arch, /* 4x8 */ \
+ subtract_average_8x4_##arch, /* 8x4 */ \
+ subtract_average_8x16_##arch, /* 8x16 */ \
+ subtract_average_16x8_##arch, /* 16x8 */ \
+ subtract_average_16x32_##arch, /* 16x32 */ \
+ subtract_average_32x16_##arch, /* 32x16 */ \
+ cfl_subtract_average_null, /* 32x64 (invalid CFL size) */ \
+ cfl_subtract_average_null, /* 64x32 (invalid CFL size) */ \
+ subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \
+ subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \
+ subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \
+ subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \
+ cfl_subtract_average_null, /* 16x64 (invalid CFL size) */ \
+ cfl_subtract_average_null, /* 64x16 (invalid CFL size) */ \
+ }; \
+ /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
+ /* index the function pointer array out of bounds. */ \
+ return sub_avg[tx_size % TX_SIZES_ALL]; \
+ }
+
+// For VSX SIMD optimization, the C versions of width == 4 subtract are
+// faster than the VSX. As such, the VSX code calls the C versions.
+void subtract_average_4x4_c(const uint16_t *src, int16_t *dst);
+void subtract_average_4x8_c(const uint16_t *src, int16_t *dst);
+void subtract_average_4x16_c(const uint16_t *src, int16_t *dst);
+
+#define CFL_PREDICT_lbd(arch, width, height) \
+ void predict_lbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \
+ uint8_t *dst, int dst_stride, \
+ int alpha_q3) { \
+ cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \
+ height); \
+ }
+
+#define CFL_PREDICT_hbd(arch, width, height) \
+ void predict_hbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \
+ uint16_t *dst, int dst_stride, \
+ int alpha_q3, int bd) { \
+ cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \
+ height); \
+ }
+
+// This wrapper exists because clang format does not like calling macros with
+// lowercase letters.
+#define CFL_PREDICT_X(arch, width, height, bd) \
+ CFL_PREDICT_##bd(arch, width, height)
+
+// Null function used for invalid tx_sizes
+void cfl_predict_lbd_null(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+
+// Null function used for invalid tx_sizes
+void cfl_predict_hbd_null(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+
+#define CFL_PREDICT_FN(arch, bd) \
+ CFL_PREDICT_X(arch, 4, 4, bd) \
+ CFL_PREDICT_X(arch, 4, 8, bd) \
+ CFL_PREDICT_X(arch, 4, 16, bd) \
+ CFL_PREDICT_X(arch, 8, 4, bd) \
+ CFL_PREDICT_X(arch, 8, 8, bd) \
+ CFL_PREDICT_X(arch, 8, 16, bd) \
+ CFL_PREDICT_X(arch, 8, 32, bd) \
+ CFL_PREDICT_X(arch, 16, 4, bd) \
+ CFL_PREDICT_X(arch, 16, 8, bd) \
+ CFL_PREDICT_X(arch, 16, 16, bd) \
+ CFL_PREDICT_X(arch, 16, 32, bd) \
+ CFL_PREDICT_X(arch, 32, 8, bd) \
+ CFL_PREDICT_X(arch, 32, 16, bd) \
+ CFL_PREDICT_X(arch, 32, 32, bd) \
+ cfl_predict_##bd##_fn get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \
+ static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \
+ predict_##bd##_4x4_##arch, /* 4x4 */ \
+ predict_##bd##_8x8_##arch, /* 8x8 */ \
+ predict_##bd##_16x16_##arch, /* 16x16 */ \
+ predict_##bd##_32x32_##arch, /* 32x32 */ \
+ cfl_predict_##bd##_null, /* 64x64 (invalid CFL size) */ \
+ predict_##bd##_4x8_##arch, /* 4x8 */ \
+ predict_##bd##_8x4_##arch, /* 8x4 */ \
+ predict_##bd##_8x16_##arch, /* 8x16 */ \
+ predict_##bd##_16x8_##arch, /* 16x8 */ \
+ predict_##bd##_16x32_##arch, /* 16x32 */ \
+ predict_##bd##_32x16_##arch, /* 32x16 */ \
+ cfl_predict_##bd##_null, /* 32x64 (invalid CFL size) */ \
+ cfl_predict_##bd##_null, /* 64x32 (invalid CFL size) */ \
+ predict_##bd##_4x16_##arch, /* 4x16 */ \
+ predict_##bd##_16x4_##arch, /* 16x4 */ \
+ predict_##bd##_8x32_##arch, /* 8x32 */ \
+ predict_##bd##_32x8_##arch, /* 32x8 */ \
+ cfl_predict_##bd##_null, /* 16x64 (invalid CFL size) */ \
+ cfl_predict_##bd##_null, /* 64x16 (invalid CFL size) */ \
+ }; \
+ /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
+ /* index the function pointer array out of bounds. */ \
+ return pred[tx_size % TX_SIZES_ALL]; \
+ }
#endif // AV1_COMMON_CFL_H_
diff --git a/third_party/aom/av1/common/clpf.c b/third_party/aom/av1/common/clpf.c
deleted file mode 100644
index d643236aa..000000000
--- a/third_party/aom/av1/common/clpf.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "./cdef.h"
-#include "aom/aom_image.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static int clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
- int H, int s, unsigned int dmp) {
- int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
- 1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
- 3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
- 3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
- return (8 + delta - (delta < 0)) >> 4;
-}
-
-static int clpf_hsample(int X, int A, int B, int C, int D, int s,
- unsigned int dmp) {
- int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
- 3 * constrain(C - X, s, dmp) + 1 * constrain(D - X, s, dmp);
- return (4 + delta - (delta < 0)) >> 3;
-}
-
-void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey, unsigned int strength,
- unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[(y - 2) * sstride + x];
- const int B = src[(y - 1) * sstride + x];
- const int C = src[y * sstride + x - 2];
- const int D = src[y * sstride + x - 1];
- const int E = src[y * sstride + x + 1];
- const int F = src[y * sstride + x + 2];
- const int G = src[(y + 1) * sstride + x];
- const int H = src[(y + 2) * sstride + x];
- const int delta =
- clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-// Identical to aom_clpf_block_c() apart from "dst".
-void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[(y - 2) * sstride + x];
- const int B = src[(y - 1) * sstride + x];
- const int C = src[y * sstride + x - 2];
- const int D = src[y * sstride + x - 1];
- const int E = src[y * sstride + x + 1];
- const int F = src[y * sstride + x + 2];
- const int G = src[(y + 1) * sstride + x];
- const int H = src[(y + 2) * sstride + x];
- const int delta =
- clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-// Vertically restricted filter
-void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey, unsigned int strength,
- unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[y * sstride + x - 2];
- const int B = src[y * sstride + x - 1];
- const int C = src[y * sstride + x + 1];
- const int D = src[y * sstride + x + 2];
- const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[y * sstride + x - 2];
- const int B = src[y * sstride + x - 1];
- const int C = src[y * sstride + x + 1];
- const int D = src[y * sstride + x + 2];
- const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
diff --git a/third_party/aom/av1/common/clpf_neon.c b/third_party/aom/av1/common/clpf_neon.c
deleted file mode 100644
index f1a004c2c..000000000
--- a/third_party/aom/av1/common/clpf_neon.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_neon
-#include "./clpf_simd.h"
diff --git a/third_party/aom/av1/common/clpf_simd.h b/third_party/aom/av1/common/clpf_simd.h
deleted file mode 100644
index c7ffc569a..000000000
--- a/third_party/aom/av1/common/clpf_simd.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
- unsigned int adjdamp) {
- v128 diff = v128_sub_16(a, b);
- const v128 sign = v128_shr_n_s16(diff, 15);
- diff = v128_abs_s16(diff);
- const v128 s =
- v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
- return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
-}
-
-// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
-SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
- unsigned int adjdamp) {
- const v256 diff16 = v256_sub_16(a, b);
- v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16));
- const v128 sign = v128_cmplt_s8(diff, v128_zero());
- diff = v128_abs_s8(diff);
- return v128_xor(
- v128_add_8(sign,
- v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength),
- v128_shr_u8(diff, adjdamp)))),
- sign);
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-// 1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-// 3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-// 3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta(v256 x, v256 a, v256 b, v256 c, v256 d, v256 e,
- v256 f, v256 g, v256 h, unsigned int s,
- unsigned int dmp) {
- const v128 bdeg =
- v128_add_8(v128_add_8(constrain(b, x, s, dmp), constrain(d, x, s, dmp)),
- v128_add_8(constrain(e, x, s, dmp), constrain(g, x, s, dmp)));
- const v128 delta = v128_add_8(
- v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(c, x, s, dmp)),
- v128_add_8(constrain(f, x, s, dmp), constrain(h, x, s, dmp))),
- v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
- return v128_add_8(
- v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
- v128_shr_s8(
- v128_add_8(v128_dup_8(8),
- v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
- 4));
-}
-
-// delta = 1/8 * constrain(a, x, s, d) + 3/8 * constrain(b, x, s, d) +
-// 3/8 * constrain(c, x, s, d) + 1/8 * constrain(d, x, s, d) +
-SIMD_INLINE v128 calc_hdelta(v256 x, v256 a, v256 b, v256 c, v256 d,
- unsigned int s, unsigned int dmp) {
- const v128 bc = v128_add_8(constrain(b, x, s, dmp), constrain(c, x, s, dmp));
- const v128 delta =
- v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(d, x, s, dmp)),
- v128_add_8(v128_add_8(bc, bc), bc));
- return v128_add_8(
- v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
- v128_shr_s8(
- v128_add_8(v128_dup_8(4),
- v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
- 3));
-}
-
-// Process blocks of width 8, two lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block8)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v128 l1 = v128_load_aligned(src);
- const v128 l2 = v128_load_aligned(src + sstride);
- const v128 l3 = v128_load_aligned(src - sstride);
- const v128 l4 = v128_load_aligned(src + 2 * sstride);
- const v256 a = v256_from_v128(v128_load_aligned(src - 2 * sstride), l3);
- const v256 b = v256_from_v128(l3, l1);
- const v256 g = v256_from_v128(l2, l4);
- const v256 h = v256_from_v128(l4, v128_load_aligned(src + 3 * sstride));
- const v256 c = v256_from_v128(v128_load_unaligned(src - 2),
- v128_load_unaligned(src - 2 + sstride));
- const v256 d = v256_from_v128(v128_load_unaligned(src - 1),
- v128_load_unaligned(src - 1 + sstride));
- const v256 e = v256_from_v128(v128_load_unaligned(src + 1),
- v128_load_unaligned(src + 1 + sstride));
- const v256 f = v256_from_v128(v128_load_unaligned(src + 2),
- v128_load_unaligned(src + 2 + sstride));
- const v128 o = calc_delta(v256_from_v128(l1, l2), a, b, c, d, e, f, g, h,
- strength, adjdamp);
-
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block4)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 4) {
- const v64 l0 = v64_load_aligned(src - 2 * sstride);
- const v64 l1 = v64_load_aligned(src - sstride);
- const v64 l2 = v64_load_aligned(src);
- const v64 l3 = v64_load_aligned(src + sstride);
- const v64 l4 = v64_load_aligned(src + 2 * sstride);
- const v64 l5 = v64_load_aligned(src + 3 * sstride);
- const v64 l6 = v64_load_aligned(src + 4 * sstride);
- const v64 l7 = v64_load_aligned(src + 5 * sstride);
- const v128 o =
- calc_delta(v256_from_v64(l2, l3, l4, l5), v256_from_v64(l0, l1, l2, l3),
- v256_from_v64(l1, l2, l3, l4),
- v256_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src + sstride - 2),
- v64_load_unaligned(src + 2 * sstride - 2),
- v64_load_unaligned(src + 3 * sstride - 2)),
- v256_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src + sstride - 1),
- v64_load_unaligned(src + 2 * sstride - 1),
- v64_load_unaligned(src + 3 * sstride - 1)),
- v256_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + sstride + 1),
- v64_load_unaligned(src + 2 * sstride + 1),
- v64_load_unaligned(src + 3 * sstride + 1)),
- v256_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + sstride + 2),
- v64_load_unaligned(src + 2 * sstride + 2),
- v64_load_unaligned(src + 3 * sstride + 2)),
- v256_from_v64(l3, l4, l5, l6), v256_from_v64(l4, l5, l6, l7),
- strength, adjdamp);
-
- u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
- u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
- u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
- u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
- dst += 4 * dstride;
- src += 4 * sstride;
- }
-}
-
-static void SIMD_FUNC(clpf_hblock8)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v256 x = v256_from_v128(v128_load_aligned(src),
- v128_load_aligned(src + sstride));
- const v256 a = v256_from_v128(v128_load_unaligned(src - 2),
- v128_load_unaligned(src - 2 + sstride));
- const v256 b = v256_from_v128(v128_load_unaligned(src - 1),
- v128_load_unaligned(src - 1 + sstride));
- const v256 c = v256_from_v128(v128_load_unaligned(src + 1),
- v128_load_unaligned(src + 1 + sstride));
- const v256 d = v256_from_v128(v128_load_unaligned(src + 2),
- v128_load_unaligned(src + 2 + sstride));
- const v128 o = calc_hdelta(x, a, b, c, d, strength, adjdamp);
-
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_hblock4)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 4) {
- const v256 a = v256_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src + sstride - 2),
- v64_load_unaligned(src + 2 * sstride - 2),
- v64_load_unaligned(src + 3 * sstride - 2));
- const v256 b = v256_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src + sstride - 1),
- v64_load_unaligned(src + 2 * sstride - 1),
- v64_load_unaligned(src + 3 * sstride - 1));
- const v256 c = v256_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + sstride + 1),
- v64_load_unaligned(src + 2 * sstride + 1),
- v64_load_unaligned(src + 3 * sstride + 1));
- const v256 d = v256_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + sstride + 2),
- v64_load_unaligned(src + 2 * sstride + 2),
- v64_load_unaligned(src + 3 * sstride + 2));
-
- const v128 o = calc_hdelta(
- v256_from_v64(v64_load_aligned(src), v64_load_aligned(src + sstride),
- v64_load_aligned(src + 2 * sstride),
- v64_load_aligned(src + 3 * sstride)),
- a, b, c, d, strength, adjdamp);
-
- u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
- u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
- u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
- u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
- dst += 4 * dstride;
- src += 4 * sstride;
- }
-}
-
-void SIMD_FUNC(aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block widths not 4 or 8
- // * block heights not a multiple of 4 if the block width is 4
- aom_clpf_block_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_block4) : SIMD_FUNC(clpf_block8))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-void SIMD_FUNC(aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block widths not 4 or 8
- // * block heights not a multiple of 4 if the block width is 4
- aom_clpf_hblock_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_hblock4) : SIMD_FUNC(clpf_hblock8))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-// 1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-// 3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-// 3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, unsigned int s,
- unsigned int dmp) {
- const v128 bdeg = v128_add_16(
- v128_add_16(constrain16(b, x, s, dmp), constrain16(d, x, s, dmp)),
- v128_add_16(constrain16(e, x, s, dmp), constrain16(g, x, s, dmp)));
- const v128 delta = v128_add_16(
- v128_add_16(
- v128_add_16(constrain16(a, x, s, dmp), constrain16(c, x, s, dmp)),
- v128_add_16(constrain16(f, x, s, dmp), constrain16(h, x, s, dmp))),
- v128_add_16(v128_add_16(bdeg, bdeg), bdeg));
- return v128_add_16(
- x,
- v128_shr_s16(
- v128_add_16(v128_dup_16(8),
- v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
- 4));
-}
-
-static void calc_delta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, uint16_t *dst,
- unsigned int s, unsigned int dmp, int dstride) {
- o = calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, dmp);
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_delta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, uint16_t *dst,
- unsigned int s, unsigned int adjdamp) {
- v128_store_aligned(dst,
- calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, adjdamp));
-}
-
-// delta = 1/16 * constrain(a, x, s, dmp) + 3/16 * constrain(b, x, s, dmp) +
-// 3/16 * constrain(c, x, s, dmp) + 1/16 * constrain(d, x, s, dmp)
-SIMD_INLINE v128 calc_hdelta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d,
- unsigned int s, unsigned int dmp) {
- const v128 bc =
- v128_add_16(constrain16(b, x, s, dmp), constrain16(c, x, s, dmp));
- const v128 delta = v128_add_16(
- v128_add_16(constrain16(a, x, s, dmp), constrain16(d, x, s, dmp)),
- v128_add_16(v128_add_16(bc, bc), bc));
- return v128_add_16(
- x,
- v128_shr_s16(
- v128_add_16(v128_dup_16(4),
- v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
- 3));
-}
-
-static void calc_hdelta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d,
- uint16_t *dst, unsigned int s,
- unsigned int adjdamp, int dstride) {
- o = calc_hdelta_hbd(o, a, b, c, d, s, adjdamp);
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_hdelta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d,
- uint16_t *dst, unsigned int s,
- unsigned int adjdamp) {
- v128_store_aligned(dst, calc_hdelta_hbd(o, a, b, c, d, s, adjdamp));
-}
-
-// Process blocks of width 4, two lines at time.
-static void SIMD_FUNC(clpf_block_hbd4)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v64 l1 = v64_load_aligned(src);
- const v64 l2 = v64_load_aligned(src + sstride);
- const v64 l3 = v64_load_aligned(src - sstride);
- const v64 l4 = v64_load_aligned(src + 2 * sstride);
- const v128 a = v128_from_v64(v64_load_aligned(src - 2 * sstride), l3);
- const v128 b = v128_from_v64(l3, l1);
- const v128 g = v128_from_v64(l2, l4);
- const v128 h = v128_from_v64(l4, v64_load_aligned(src + 3 * sstride));
- const v128 c = v128_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src - 2 + sstride));
- const v128 d = v128_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src - 1 + sstride));
- const v128 e = v128_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + 1 + sstride));
- const v128 f = v128_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + 2 + sstride));
-
- calc_delta_hbd4(v128_from_v64(l1, l2), a, b, c, d, e, f, g, h, dst,
- strength, adjdamp, dstride);
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// The most simple case. Start here if you need to understand the functions.
-static void SIMD_FUNC(clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y++) {
- const v128 o = v128_load_aligned(src);
- const v128 a = v128_load_aligned(src - 2 * sstride);
- const v128 b = v128_load_aligned(src - 1 * sstride);
- const v128 g = v128_load_aligned(src + sstride);
- const v128 h = v128_load_aligned(src + 2 * sstride);
- const v128 c = v128_load_unaligned(src - 2);
- const v128 d = v128_load_unaligned(src - 1);
- const v128 e = v128_load_unaligned(src + 1);
- const v128 f = v128_load_unaligned(src + 2);
-
- calc_delta_hbd8(o, a, b, c, d, e, f, g, h, dst, strength, adjdamp);
- src += sstride;
- dst += dstride;
- }
-}
-
-// Process blocks of width 4, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd4)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v128 a = v128_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src - 2 + sstride));
- const v128 b = v128_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src - 1 + sstride));
- const v128 c = v128_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + 1 + sstride));
- const v128 d = v128_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + 2 + sstride));
-
- calc_hdelta_hbd4(v128_from_v64(v64_load_unaligned(src),
- v64_load_unaligned(src + sstride)),
- a, b, c, d, dst, strength, adjdamp, dstride);
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 8, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y++) {
- const v128 o = v128_load_aligned(src);
- const v128 a = v128_load_unaligned(src - 2);
- const v128 b = v128_load_unaligned(src - 1);
- const v128 c = v128_load_unaligned(src + 1);
- const v128 d = v128_load_unaligned(src + 2);
-
- calc_hdelta_hbd8(o, a, b, c, d, dst, strength, adjdamp);
- src += sstride;
- dst += dstride;
- }
-}
-
-void SIMD_FUNC(aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizex,
- int sizey, unsigned int strength,
- unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block width not 4 or 8
- // * block heights not a multiple of 2 if the block width is 4
- aom_clpf_block_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
- dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_block_hbd4) : SIMD_FUNC(clpf_block_hbd))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-void SIMD_FUNC(aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizex,
- int sizey, unsigned int strength,
- unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block width not 4 or 8
- // * block heights not a multiple of 2 if the block width is 4
- aom_clpf_hblock_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
- dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_hblock_hbd4) : SIMD_FUNC(clpf_hblock_hbd))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
diff --git a/third_party/aom/av1/common/clpf_sse2.c b/third_party/aom/av1/common/clpf_sse2.c
deleted file mode 100644
index e29c2ab7e..000000000
--- a/third_party/aom/av1/common/clpf_sse2.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse2
-#include "./clpf_simd.h"
diff --git a/third_party/aom/av1/common/clpf_sse4.c b/third_party/aom/av1/common/clpf_sse4.c
deleted file mode 100644
index 537139f17..000000000
--- a/third_party/aom/av1/common/clpf_sse4.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse4_1
-#include "./clpf_simd.h"
diff --git a/third_party/aom/av1/common/clpf_ssse3.c b/third_party/aom/av1/common/clpf_ssse3.c
deleted file mode 100644
index d7ed8dec5..000000000
--- a/third_party/aom/av1/common/clpf_ssse3.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_ssse3
-#include "./clpf_simd.h"
diff --git a/third_party/aom/av1/common/common.h b/third_party/aom/av1/common/common.h
index 8611b776f..72c6d3a1e 100644
--- a/third_party/aom/av1/common/common.h
+++ b/third_party/aom/av1/common/common.h
@@ -20,6 +20,7 @@
#include "aom_mem/aom_mem.h"
#include "aom/aom_integer.h"
#include "aom_ports/bitops.h"
+#include "config/aom_config.h"
#ifdef __cplusplus
extern "C" {
@@ -53,6 +54,8 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
#define AOM_FRAME_MARKER 0x2
+#define AV1_MIN_TILE_SIZE_BYTES 1
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/common_data.h b/third_party/aom/av1/common/common_data.h
index 1a74fe76e..f521f10bf 100644
--- a/third_party/aom/av1/common/common_data.h
+++ b/third_party/aom/av1/common/common_data.h
@@ -20,600 +20,78 @@
extern "C" {
#endif
-#if CONFIG_EXT_PARTITION
-#define IF_EXT_PARTITION(...) __VA_ARGS__,
-#else
-#define IF_EXT_PARTITION(...)
-#endif
-
-// Log 2 conversion lookup tables for block width and height
-static const uint8_t b_width_log2_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, 0,
- 0,
-#endif
- 0, 0,
- 1, 1,
- 1, 2,
- 2, 2,
- 3, 3,
- 3, 4,
- 4, IF_EXT_PARTITION(4, 5, 5) 0,
- 2, 1,
- 3, 2,
- 4, IF_EXT_PARTITION(3, 5)
-};
-static const uint8_t b_height_log2_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, 0,
- 0,
-#endif
- 0, 1,
- 0, 1,
- 2, 1,
- 2, 3,
- 2, 3,
- 4, 3,
- 4, IF_EXT_PARTITION(5, 4, 5) 2,
- 0, 3,
- 1, 4,
- 2, IF_EXT_PARTITION(5, 3)
+// Log 2 conversion lookup tables in units of mode info(4x4).
+static const uint8_t mi_size_wide_log2[BLOCK_SIZES_ALL] = {
+ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 0, 2, 1, 3, 2, 4
};
-// Log 2 conversion lookup tables for modeinfo width and height
-static const uint8_t mi_width_log2_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, 0,
- 0,
-#endif
- 0, 0,
- 1, 1,
- 1, 2,
- 2, 2,
- 3, 3,
- 3, 4,
- 4, IF_EXT_PARTITION(4, 5, 5) 0,
- 2, 1,
- 3, 2,
- 4, IF_EXT_PARTITION(3, 5)
-#else // CONFIG_CB4X4
- 0, 0,
- 0, 0,
- 0, 1,
- 1, 1,
- 2, 2,
- 2, 3,
- 3, IF_EXT_PARTITION(3, 4, 4) 0,
- 1, 0,
- 2, 1,
- 3, IF_EXT_PARTITION(2, 4)
-#endif
-};
-static const uint8_t mi_height_log2_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, 0,
- 0,
-#endif
- 0, 1,
- 0, 1,
- 2, 1,
- 2, 3,
- 2, 3,
- 4, 3,
- 4, IF_EXT_PARTITION(5, 4, 5) 2,
- 0, 3,
- 1, 4,
- 2, IF_EXT_PARTITION(5, 3)
-#else // CONFIG_CB4X4
- 0, 0,
- 0, 0,
- 1, 0,
- 1, 2,
- 1, 2,
- 3, 2,
- 3, IF_EXT_PARTITION(4, 3, 4) 1,
- 0, 2,
- 0, 3,
- 1, IF_EXT_PARTITION(2, 4)
-#endif
+static const uint8_t mi_size_high_log2[BLOCK_SIZES_ALL] = {
+ 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 2, 0, 3, 1, 4, 2
};
-/* clang-format off */
static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1, 1,
-#endif
- 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16,
- IF_EXT_PARTITION(16, 32, 32) 1, 4, 2, 8, 4, 16, IF_EXT_PARTITION(8, 32)
-#else // CONFIG_CB4X4
- 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4,
- 2, 8, IF_EXT_PARTITION(4, 16)
-#endif
+ 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 1, 4, 2, 8, 4, 16
};
+
static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1, 1,
-#endif
- 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16,
- IF_EXT_PARTITION(32, 16, 32) 4, 1, 8, 2, 16, 4, IF_EXT_PARTITION(32, 8)
-#else // CONFIG_CB4X4
- 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1,
- 8, 2, IF_EXT_PARTITION(16, 4)
-#endif
+ 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 4, 1, 8, 2, 16, 4
};
-/* clang-format on */
// Width/height lookup tables in units of various block sizes
static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 2, 2,
- 4,
-#endif
- 4, 4,
- 8, 8,
- 8, 16,
- 16, 16,
- 32, 32,
- 32, 64,
- 64, IF_EXT_PARTITION(64, 128, 128) 4,
- 16, 8,
- 32, 16,
- 64, IF_EXT_PARTITION(32, 128)
+ 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32,
+ 64, 64, 64, 128, 128, 4, 16, 8, 32, 16, 64
};
static const uint8_t block_size_high[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 2, 4,
- 2,
-#endif
- 4, 8,
- 4, 8,
- 16, 8,
- 16, 32,
- 16, 32,
- 64, 32,
- 64, IF_EXT_PARTITION(128, 64, 128) 16,
- 4, 32,
- 8, 64,
- 16, IF_EXT_PARTITION(128, 32)
-};
-
-static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 1,
- 2, 2,
- 2, 4,
- 4, 4,
- 8, 8,
- 8, 16,
- 16, IF_EXT_PARTITION(16, 32, 32) 1,
- 4, 2,
- 8, 4,
- 16, IF_EXT_PARTITION(8, 32)
-};
-static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 2,
- 1, 2,
- 4, 2,
- 4, 8,
- 4, 8,
- 16, 8,
- 16, IF_EXT_PARTITION(32, 16, 32) 4,
- 1, 8,
- 2, 16,
- 4, IF_EXT_PARTITION(32, 8)
-};
-static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 1,
- 1, 1,
- 1, 2,
- 2, 2,
- 4, 4,
- 4, 8,
- 8, IF_EXT_PARTITION(8, 16, 16) 1,
- 2, 1,
- 4, 2,
- 8, IF_EXT_PARTITION(4, 16)
-};
-static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 1,
- 1, 1,
- 2, 1,
- 2, 4,
- 2, 4,
- 8, 4,
- 8, IF_EXT_PARTITION(16, 8, 16) 2,
- 1, 4,
- 1, 8,
- 2, IF_EXT_PARTITION(16, 4)
-};
-static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 1,
- 1, 1,
- 1, 1,
- 1, 1,
- 2, 2,
- 2, 4,
- 4, IF_EXT_PARTITION(4, 8, 8) 1,
- 1, 1,
- 2, 2,
- 4, IF_EXT_PARTITION(2, 8)
-};
-static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 1, 1,
- 1,
-#endif
- 1, 1,
- 1, 1,
- 1, 1,
- 1, 2,
- 1, 2,
- 4, 2,
- 4, IF_EXT_PARTITION(8, 4, 8) 1,
- 1, 2,
- 1, 4,
- 2, IF_EXT_PARTITION(8, 2)
+ 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64,
+ 32, 64, 128, 64, 128, 16, 4, 32, 8, 64, 16
};
// AOMMIN(3, AOMMIN(b_width_log2(bsize), b_height_log2(bsize)))
static const uint8_t size_group_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 0, 0,
- 0,
-#endif
- 0, 0,
- 0, 1,
- 1, 1,
- 2, 2,
- 2, 3,
- 3, 3,
- 3, IF_EXT_PARTITION(3, 3, 3) 0,
- 0, 1,
- 1, 2,
- 2, IF_EXT_PARTITION(3, 3)
+ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2
};
static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 2, 3,
- 3,
-#endif
- 4, 5,
- 5, 6,
- 7, 7,
- 8, 9,
- 9, 10,
- 11, 11,
- 12, IF_EXT_PARTITION(13, 13, 14) 6,
- 6, 8,
- 8, 10,
- 10, IF_EXT_PARTITION(12, 12)
+ 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 6, 6, 8, 8, 10, 10
};
/* clang-format off */
-#if CONFIG_EXT_PARTITION_TYPES
-static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES_ALL] =
-#else
-static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
-#endif // CONFIG_EXT_PARTITION_TYPES
-{
+static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][SQR_BLOCK_SIZES] = {
{ // PARTITION_NONE
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_2X2, BLOCK_2X4, BLOCK_4X2,
-#endif
- // 4X4
- BLOCK_4X4,
- // 4X8, 8X4, 8X8
- BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
- // 8X16, 16X8, 16X16
- BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
- // 16X32, 32X16, 32X32
- BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
- // 32X64, 64X32, 64X64
- BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_4X16, BLOCK_16X4, BLOCK_8X32,
- // 32X8, 16X64, 64X16
- BLOCK_32X8, BLOCK_16X64, BLOCK_64X16,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_32X128, BLOCK_128X32
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_4X4, BLOCK_8X8, BLOCK_16X16,
+ BLOCK_32X32, BLOCK_64X64, BLOCK_128X128
}, { // PARTITION_HORZ
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 4X4
- BLOCK_4X2,
-#else
- // 4X4
- BLOCK_INVALID,
-#endif
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
+ BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
}, { // PARTITION_VERT
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 4X4
- BLOCK_2X4,
-#else
- // 4X4
- BLOCK_INVALID,
-#endif
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
+ BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
}, { // PARTITION_SPLIT
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#endif
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
-#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_INVALID, BLOCK_4X4, BLOCK_8X8,
+ BLOCK_16X16, BLOCK_32X32, BLOCK_64X64
}, { // PARTITION_HORZ_A
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#endif
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
+ BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
}, { // PARTITION_HORZ_B
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#endif
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
+ BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
}, { // PARTITION_VERT_A
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#endif
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
+ BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
}, { // PARTITION_VERT_B
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#endif
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- // 8X16, 16X8, 16X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
+ BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
}, { // PARTITION_HORZ_4
-#if CONFIG_CB4X4
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 4X4
- BLOCK_INVALID,
-#else
- // 4X4
- BLOCK_INVALID,
-#endif
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X8,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X32,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+ BLOCK_32X8, BLOCK_64X16, BLOCK_INVALID
}, { // PARTITION_VERT_4
-#if CONFIG_CB4X4
- // 2X2, 2X4, 4X2,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 4X4
- BLOCK_INVALID,
-#else
- // 4X4
- BLOCK_INVALID,
-#endif
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 8X16, 16X8, 16X16
BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
- // 16X32, 32X16, 32X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X32,
- // 32X64, 64X32, 64X64
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X128,
-#endif // CONFIG_EXT_PARTITION
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- // 32X8, 16X64, 64X16
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- BLOCK_INVALID, BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_EXT_PARTITION_TYPES
+ BLOCK_8X32, BLOCK_16X64, BLOCK_INVALID
}
};
static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = {
- // 2X2, 2X4, 4X2,
-#if CONFIG_CHROMA_2X2
- TX_2X2, TX_2X2, TX_2X2,
-#elif CONFIG_CHROMA_SUB8X8
- TX_4X4, TX_4X4, TX_4X4,
-#endif
// 4X4
TX_4X4,
// 4X8, 8X4, 8X8
@@ -624,1436 +102,291 @@ static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = {
TX_16X16, TX_16X16, TX_32X32,
// 32X64, 64X32,
TX_32X32, TX_32X32,
-#if CONFIG_TX64X64
// 64X64
TX_64X64,
-#if CONFIG_EXT_PARTITION
// 64x128, 128x64, 128x128
TX_64X64, TX_64X64, TX_64X64,
-#endif // CONFIG_EXT_PARTITION
-#else
- // 64X64
- TX_32X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_32X32, TX_32X32, TX_32X32,
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_TX64X64
// 4x16, 16x4, 8x32
TX_4X4, TX_4X4, TX_8X8,
// 32x8, 16x64 64x16
- TX_8X8, TX_16X16, TX_16X16,
-#if CONFIG_EXT_PARTITION
- // 32x128 128x32
- TX_32X32, TX_32X32
-#endif // CONFIG_EXT_PARTITION
+ TX_8X8, TX_16X16, TX_16X16
};
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
- // 2X2, 2X4, 4X2,
-#if CONFIG_CHROMA_2X2
- TX_2X2, TX_2X2, TX_2X2,
-#elif CONFIG_CHROMA_SUB8X8
- TX_4X4, TX_4X4, TX_4X4,
-#endif // CONFIG_CHROMA_SUB8X8
- // 4X4
- TX_4X4,
- // 4X8, 8X4, 8X8
- TX_4X8, TX_8X4, TX_8X8,
- // 8X16, 16X8, 16X16
- TX_8X16, TX_16X8, TX_16X16,
- // 16X32, 32X16, 32X32
- TX_16X32, TX_32X16, TX_32X32,
-#if CONFIG_TX64X64
- // 32X64, 64X32,
- TX_32X64, TX_64X32,
- // 64X64
- TX_64X64,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_64X64, TX_64X64, TX_64X64,
-#endif // CONFIG_EXT_PARTITION
-#else
- // 32X64, 64X32,
- TX_32X32, TX_32X32,
- // 64X64
- TX_32X32,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_32X32, TX_32X32, TX_32X32,
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_TX64X64
-#if CONFIG_RECT_TX_EXT
- // 4x16, 16x4, 8x32
- TX_4X16, TX_16X4, TX_8X32,
- // 32x8
- TX_32X8,
-#else
- // 4x16, 16x4, 8x32
- TX_4X8, TX_8X4, TX_8X16,
- // 32x8
- TX_16X8,
-#endif
- // 16x64, 64x16
- TX_16X32, TX_32X16,
-#if CONFIG_EXT_PARTITION
- // 32x128 128x32
- TX_32X32, TX_32X32
-#endif // CONFIG_EXT_PARTITION
-};
-
-#if CONFIG_RECT_TX_EXT
-static const TX_SIZE quarter_txsize_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- TX_INVALID, TX_INVALID, TX_INVALID,
-#endif
- // 4x4,
- TX_INVALID,
- // 4x8, 8x4, 8x8,
- TX_INVALID, TX_INVALID, TX_INVALID,
- // 8x16, 16x8, 16x16,
- TX_4X16, TX_16X4, TX_INVALID,
- // 16x32, 32x16, 32x32,
- TX_8X32, TX_32X8, TX_INVALID,
- // 32x64, 64x32, 64x64
- TX_INVALID, TX_INVALID, TX_INVALID,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_INVALID, TX_INVALID, TX_INVALID,
-#endif
- // 4x16, 16x4, 8x32
- TX_4X16, TX_16X4, TX_8X32,
- // 32x8 16x64 64x16
- TX_32X8, TX_INVALID, TX_INVALID,
-#if CONFIG_EXT_PARTITION
- // 32x128 128x32
- TX_INVALID, TX_INVALID
-#endif // CONFIG_EXT_PARTITION
+ // 4X4
+ TX_4X4,
+ // 4X8, 8X4, 8X8
+ TX_4X8, TX_8X4, TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_8X16, TX_16X8, TX_16X16,
+ // 16X32, 32X16, 32X32
+ TX_16X32, TX_32X16, TX_32X32,
+ // 32X64, 64X32,
+ TX_32X64, TX_64X32,
+ // 64X64
+ TX_64X64,
+ // 64x128, 128x64, 128x128
+ TX_64X64, TX_64X64, TX_64X64,
+ // 4x16, 16x4,
+ TX_4X16, TX_16X4,
+ // 8x32, 32x8
+ TX_8X32, TX_32X8,
+ // 16x64, 64x16
+ TX_16X64, TX_64X16
};
-#endif
-#else
-#define max_txsize_rect_lookup max_txsize_lookup
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
-#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
-#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
-#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
-#endif // CONFIG_EXT_TX
-};
-
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
-// block which may use a rectangular transform, in which case it is
-// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
-static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- INT32_MIN, INT32_MIN, INT32_MIN,
-#endif
- // 4X4,
- INT32_MIN,
- // 4X8, 8X4, 8X8,
- TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_8X8 - TX_8X8,
-#else // CONFIG_CB4X4
- // 4X4
- INT32_MIN,
- // 4X8, 8X4, 8X8
- INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
-#endif // CONFIG_CB4X4
- // 8X16, 16X8, 16X16
- TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
- // 16X32, 32X16, 32X32
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
-#if CONFIG_TX64X64
- // 32X64, 64X32,
- TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
- // 64X64
- TX_64X64 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#else
- // 32X64, 64X32,
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
- // 64X64
- TX_32X32 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_TX64X64
- // TODO(david.barker): Change these if we support rectangular transforms
- // for 4:1 shaped partitions
- // 4x16, 16x4, 8x32
- TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_8X8 - TX_8X8,
- // 32x8, 16x64, 64x16
- TX_8X8 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8
-#endif // CONFIG_EXT_PARTITION
-};
-#else
-// Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
-static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2,
- INT32_MIN, INT32_MIN, INT32_MIN,
-#endif
- // 4X4
- INT32_MIN,
- // 4X8, 8X4, 8X8
- INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
- // 8X16, 16X8, 16X16
- TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_16X16 - TX_8X8,
- // 16X32, 32X16, 32X32
- TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_32X32 - TX_8X8,
-#if CONFIG_TX64X64
- // 32X64, 64X32,
- TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
- // 64X64
- TX_64X64 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#else
- // 32X64, 64X32,
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
- // 64X64
- TX_32X32 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 64x128, 128x64, 128x128
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_TX64X64
- // 4x16, 16x4, 8x32
- TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_8X8 - TX_8X8,
- // 32x8 16x64, 64x16
- TX_8X8 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
-#if CONFIG_EXT_PARTITION
- // 32x128, 128x32
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8
-#endif // CONFIG_EXT_PARTITION
};
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
+#define TXSIZE_CAT_INVALID (-1)
/* clang-format on */
static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // TX_2X2
-#endif
TX_4X4, // TX_4X4
TX_4X4, // TX_8X8
TX_8X8, // TX_16X16
TX_16X16, // TX_32X32
-#if CONFIG_TX64X64
TX_32X32, // TX_64X64
-#endif // CONFIG_TX64X64
TX_4X4, // TX_4X8
TX_4X4, // TX_8X4
TX_8X8, // TX_8X16
TX_8X8, // TX_16X8
TX_16X16, // TX_16X32
TX_16X16, // TX_32X16
-#if CONFIG_TX64X64
TX_32X32, // TX_32X64
TX_32X32, // TX_64X32
-#endif // CONFIG_TX64X64
- TX_4X4, // TX_4X16
- TX_4X4, // TX_16X4
- TX_8X8, // TX_8X32
- TX_8X8, // TX_32X8
+ TX_4X8, // TX_4X16
+ TX_8X4, // TX_16X4
+ TX_8X16, // TX_8X32
+ TX_16X8, // TX_32X8
+ TX_16X32, // TX_16X64
+ TX_32X16, // TX_64X16
};
static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // TX_2X2
-#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
-#if CONFIG_TX64X64
TX_64X64, // TX_64X64
-#endif // CONFIG_TX64X64
TX_4X4, // TX_4X8
TX_8X8, // TX_8X4
TX_8X8, // TX_8X16
TX_16X16, // TX_16X8
TX_16X16, // TX_16X32
TX_32X32, // TX_32X16
-#if CONFIG_TX64X64
TX_32X32, // TX_32X64
TX_64X64, // TX_64X32
-#endif // CONFIG_TX64X64
TX_4X4, // TX_4X16
TX_16X16, // TX_16X4
TX_8X8, // TX_8X32
TX_32X32, // TX_32X8
+ TX_16X16, // TX_16X64
+ TX_64X64, // TX_64X16
};
static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // TX_2X2
-#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
-#if CONFIG_TX64X64
TX_64X64, // TX_64X64
-#endif // CONFIG_TX64X64
TX_8X8, // TX_4X8
TX_4X4, // TX_8X4
TX_16X16, // TX_8X16
TX_8X8, // TX_16X8
TX_32X32, // TX_16X32
TX_16X16, // TX_32X16
-#if CONFIG_TX64X64
TX_64X64, // TX_32X64
TX_32X32, // TX_64X32
-#endif // CONFIG_TX64X64
TX_16X16, // TX_4X16
TX_4X4, // TX_16X4
TX_32X32, // TX_8X32
TX_8X8, // TX_32X8
+ TX_64X64, // TX_16X64
+ TX_16X16, // TX_64X16
};
-#if CONFIG_CHROMA_2X2
-#define TX_SIZE_W_MIN 2
-#else
#define TX_SIZE_W_MIN 4
-#endif
// Transform block width in pixels
static const int tx_size_wide[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 2,
-#endif
- 4, 8, 16, 32,
-#if CONFIG_TX64X64
- 64,
-#endif // CONFIG_TX64X64
- 4, 8, 8, 16, 16, 32,
-#if CONFIG_TX64X64
- 32, 64,
-#endif // CONFIG_TX64X64
- 4, 16, 8, 32
+ 4, 8, 16, 32, 64, 4, 8, 8, 16, 16, 32, 32, 64, 4, 16, 8, 32, 16, 64,
};
-#if CONFIG_CHROMA_2X2
-#define TX_SIZE_H_MIN 2
-#else
#define TX_SIZE_H_MIN 4
-#endif
// Transform block height in pixels
static const int tx_size_high[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 2,
-#endif
- 4, 8, 16, 32,
-#if CONFIG_TX64X64
- 64,
-#endif // CONFIG_TX64X64
- 8, 4, 16, 8, 32, 16,
-#if CONFIG_TX64X64
- 64, 32,
-#endif // CONFIG_TX64X64
- 16, 4, 32, 8
+ 4, 8, 16, 32, 64, 8, 4, 16, 8, 32, 16, 64, 32, 16, 4, 32, 8, 64, 16,
};
// Transform block width in unit
static const int tx_size_wide_unit[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 1, 2, 4, 8, 16,
-#if CONFIG_TX64X64
- 32,
-#endif // CONFIG_TX64X64
- 2, 4, 4, 8, 8, 16,
-#if CONFIG_TX64X64
- 16, 32,
-#endif // CONFIG_TX64X64
- 2, 8, 4, 16
-#else // CONFIG_CHROMA_2X2
- 1, 2, 4, 8,
-#if CONFIG_TX64X64
- 16,
-#endif // CONFIG_TX64X64
- 1, 2, 2, 4, 4, 8,
-#if CONFIG_TX64X64
- 8, 16,
-#endif // CONFIG_TX64X64
- 1, 4, 2, 8
-#endif // CONFIG_CHROMA_2X2
+ 1, 2, 4, 8, 16, 1, 2, 2, 4, 4, 8, 8, 16, 1, 4, 2, 8, 4, 16,
};
// Transform block height in unit
static const int tx_size_high_unit[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 1, 2, 4, 8, 16,
-#if CONFIG_TX64X64
- 32,
-#endif // CONFIG_TX64X64
- 4, 2, 8, 4, 16, 8,
-#if CONFIG_TX64X64
- 32, 16,
-#endif // CONFIG_TX64X64
- 8, 2, 16, 4
-#else // CONFIG_CHROMA_2X2
- 1, 2, 4, 8,
-#if CONFIG_TX64X64
- 16,
-#endif // CONFIG_TX64X64
- 2, 1, 4, 2, 8, 4,
-#if CONFIG_TX64X64
- 16, 8,
-#endif // CONFIG_TX64X64
- 4, 1, 8, 2
-#endif // CONFIG_CHROMA_2X2
+ 1, 2, 4, 8, 16, 2, 1, 4, 2, 8, 4, 16, 8, 4, 1, 8, 2, 16, 4,
};
// Transform block width in log2
static const int tx_size_wide_log2[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 1,
-#endif
- 2, 3, 4, 5,
-#if CONFIG_TX64X64
- 6,
-#endif // CONFIG_TX64X64
- 2, 3, 3, 4, 4, 5,
-#if CONFIG_TX64X64
- 5, 6,
-#endif // CONFIG_TX64X64
- 2, 4, 3, 5
+ 2, 3, 4, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 2, 4, 3, 5, 4, 6,
};
// Transform block height in log2
static const int tx_size_high_log2[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 1,
-#endif
- 2, 3, 4, 5,
-#if CONFIG_TX64X64
- 6,
-#endif // CONFIG_TX64X64
- 3, 2, 4, 3, 5, 4,
-#if CONFIG_TX64X64
- 6, 5,
-#endif // CONFIG_TX64X64
- 4, 2, 5, 3
+ 2, 3, 4, 5, 6, 3, 2, 4, 3, 5, 4, 6, 5, 4, 2, 5, 3, 6, 4,
};
-#define TX_UNIT_WIDE_LOG2 (MI_SIZE_LOG2 - tx_size_wide_log2[0])
-#define TX_UNIT_HIGH_LOG2 (MI_SIZE_LOG2 - tx_size_high_log2[0])
-
-static const int tx_size_2d[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- 4,
-#endif
- 16, 64, 256, 1024,
-#if CONFIG_TX64X64
- 4096,
-#endif // CONFIG_TX64X64
- 32, 32, 128, 128, 512, 512,
-#if CONFIG_TX64X64
- 2048, 2048,
-#endif // CONFIG_TX64X64
- 64, 64, 256, 256
+static const int tx_size_2d[TX_SIZES_ALL + 1] = {
+ 16, 64, 256, 1024, 4096, 32, 32, 128, 128, 512,
+ 512, 2048, 2048, 64, 64, 256, 256, 1024, 1024,
};
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- BLOCK_2X2, // TX_2X2
-#endif
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32
-#if CONFIG_TX64X64
BLOCK_64X64, // TX_64X64
-#endif // CONFIG_TX64X64
BLOCK_4X8, // TX_4X8
BLOCK_8X4, // TX_8X4
BLOCK_8X16, // TX_8X16
BLOCK_16X8, // TX_16X8
BLOCK_16X32, // TX_16X32
BLOCK_32X16, // TX_32X16
-#if CONFIG_TX64X64
BLOCK_32X64, // TX_32X64
BLOCK_64X32, // TX_64X32
-#endif // CONFIG_TX64X64
BLOCK_4X16, // TX_4X16
BLOCK_16X4, // TX_16X4
BLOCK_8X32, // TX_8X32
BLOCK_32X8, // TX_32X8
+ BLOCK_16X64, // TX_16X64
+ BLOCK_64X16, // TX_64X16
};
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // TX_2X2
-#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
-#if CONFIG_TX64X64
TX_64X64, // TX_64X64
-#endif // CONFIG_TX64X64
TX_4X4, // TX_4X8
TX_4X4, // TX_8X4
TX_8X8, // TX_8X16
TX_8X8, // TX_16X8
TX_16X16, // TX_16X32
TX_16X16, // TX_32X16
-#if CONFIG_TX64X64
TX_32X32, // TX_32X64
TX_32X32, // TX_64X32
-#endif // CONFIG_TX64X64
TX_4X4, // TX_4X16
TX_4X4, // TX_16X4
TX_8X8, // TX_8X32
TX_8X8, // TX_32X8
+ TX_16X16, // TX_16X64
+ TX_16X16, // TX_64X16
};
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // TX_2X2
-#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
TX_32X32, // TX_32X32
-#if CONFIG_TX64X64
TX_64X64, // TX_64X64
-#endif // CONFIG_TX64X64
TX_8X8, // TX_4X8
TX_8X8, // TX_8X4
TX_16X16, // TX_8X16
TX_16X16, // TX_16X8
TX_32X32, // TX_16X32
TX_32X32, // TX_32X16
-#if CONFIG_TX64X64
TX_64X64, // TX_32X64
TX_64X64, // TX_64X32
-#endif // CONFIG_TX64X64
TX_16X16, // TX_4X16
TX_16X16, // TX_16X4
TX_32X32, // TX_8X32
TX_32X32, // TX_32X8
+ TX_64X64, // TX_16X64
+ TX_64X64, // TX_64X16
+};
+
+static const int8_t txsize_log2_minus4[TX_SIZES_ALL] = {
+ 0, // TX_4X4
+ 2, // TX_8X8
+ 4, // TX_16X16
+ 6, // TX_32X32
+ 6, // TX_64X64
+ 1, // TX_4X8
+ 1, // TX_8X4
+ 3, // TX_8X16
+ 3, // TX_16X8
+ 5, // TX_16X32
+ 5, // TX_32X16
+ 6, // TX_32X64
+ 6, // TX_64X32
+ 2, // TX_4X16
+ 2, // TX_16X4
+ 4, // TX_8X32
+ 4, // TX_32X8
+ 5, // TX_16X64
+ 5, // TX_64X16
};
/* clang-format off */
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
TX_4X4, // ONLY_4X4
- TX_8X8, // ALLOW_8X8
- TX_16X16, // ALLOW_16X16
- TX_32X32, // ALLOW_32X32
-#if CONFIG_TX64X64
- TX_64X64, // ALLOW_64X64
+ TX_64X64, // TX_MODE_LARGEST
TX_64X64, // TX_MODE_SELECT
-#else
- TX_32X32, // TX_MODE_SELECT
-#endif // CONFIG_TX64X64
};
/* clang-format on */
static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES_ALL][2][2] = {
-// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
-// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { { BLOCK_2X2, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
- { { BLOCK_2X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
- { { BLOCK_4X2, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
- { { BLOCK_4X4, BLOCK_4X2 }, { BLOCK_2X4, BLOCK_2X2 } },
- { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_2X4 } },
- { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_4X2 } },
-#elif CONFIG_CB4X4
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
{ { BLOCK_4X4, BLOCK_4X4 }, { BLOCK_4X4, BLOCK_4X4 } },
- { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_4X4 } },
- { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_4X4 } },
-#else
- { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
- { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } },
- { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } },
-#endif
+ { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_4X4, BLOCK_4X4 } },
+ { { BLOCK_8X4, BLOCK_4X4 }, { BLOCK_4X4, BLOCK_4X4 } },
{ { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } },
- { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
- { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } },
+ { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_4X16, BLOCK_4X8 } },
+ { { BLOCK_16X8, BLOCK_16X4 }, { BLOCK_8X8, BLOCK_8X4 } },
{ { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } },
- { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } },
- { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } },
+ { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_8X32, BLOCK_8X16 } },
+ { { BLOCK_32X16, BLOCK_32X8 }, { BLOCK_16X16, BLOCK_16X8 } },
{ { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } },
- { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } },
- { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } },
+ { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_16X64, BLOCK_16X32 } },
+ { { BLOCK_64X32, BLOCK_64X16 }, { BLOCK_32X32, BLOCK_32X16 } },
{ { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } },
-#if CONFIG_EXT_PARTITION
{ { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } },
{ { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
{ { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
-#endif // CONFIG_EXT_PARTITION
- { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
- { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_8X4 } },
+ { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_4X16, BLOCK_4X8 } },
+ { { BLOCK_16X4, BLOCK_16X4 }, { BLOCK_8X4, BLOCK_8X4 } },
{ { BLOCK_8X32, BLOCK_8X16 }, { BLOCK_INVALID, BLOCK_4X16 } },
{ { BLOCK_32X8, BLOCK_INVALID }, { BLOCK_16X8, BLOCK_16X4 } },
{ { BLOCK_16X64, BLOCK_16X32 }, { BLOCK_INVALID, BLOCK_8X32 } },
- { { BLOCK_64X16, BLOCK_INVALID }, { BLOCK_32X16, BLOCK_32X8 } },
-#if CONFIG_EXT_PARTITION
- { { BLOCK_32X128, BLOCK_32X64 }, { BLOCK_INVALID, BLOCK_16X64 } },
- { { BLOCK_128X32, BLOCK_INVALID }, { BLOCK_64X32, BLOCK_64X16 } },
-#endif // CONFIG_EXT_PARTITION
+ { { BLOCK_64X16, BLOCK_INVALID }, { BLOCK_32X16, BLOCK_32X8 } }
};
-static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
-// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
-// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
-#if CONFIG_CHROMA_2X2
- {
- // BLOCK_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- },
- {
- // BLOCK_2X4
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- },
- {
- // BLOCK_4X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#if CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif // CONFIG_TX64X64
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- },
-#elif CONFIG_CHROMA_SUB8X8
- {
- // BLOCK_2x2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
- {
- // BLOCK_2X4
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
- {
- // BLOCK_4X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
-#endif
- {
-// BLOCK_4X4
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_4X4, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#else
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
- {
-// BLOCK_4X8
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_4X4, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#else
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
-#if CONFIG_CHROMA_2X2
- { { TX_4X8, TX_4X4 }, { TX_2X2, TX_2X2 } }, // used
-#else
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } }, // used
-#endif
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
- {
-// BLOCK_8X4
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_4X4, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#else
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_CHROMA_2X2
- { { TX_8X4, TX_2X2 }, { TX_4X4, TX_2X2 } }, // used
-#else
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, // used
-#endif
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- },
- {
-// BLOCK_8X8
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
- },
- {
-// BLOCK_8X16
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } }, // used
- { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
-#if CONFIG_TX64X64
- { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X16, TX_8X8 }, { TX_4X16, TX_4X8 } },
- { { TX_8X8, TX_8X8 }, { TX_4X8, TX_4X8 } },
- },
- {
-// BLOCK_16X8
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } }, // used
- { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
-#if CONFIG_TX64X64
- { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
- { { TX_16X8, TX_16X4 }, { TX_8X8, TX_8X4 } },
- },
- {
-// BLOCK_16X16
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X8 }, { TX_8X16, TX_8X8 } },
- { { TX_16X16, TX_16X8 }, { TX_8X16, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- },
- {
-// BLOCK_16X32
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X32, TX_16X16 }, { TX_8X16, TX_8X16 } }, // used
- { { TX_16X32, TX_16X16 }, { TX_8X16, TX_8X16 } },
-#if CONFIG_TX64X64
- { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
- { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
- { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X32, TX_8X16 }, { TX_8X32, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- },
- {
-// BLOCK_32X16
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_32X16, TX_16X8 }, { TX_16X16, TX_16X8 } },
- { { TX_32X16, TX_16X8 }, { TX_16X16, TX_16X8 } }, // used
-#if CONFIG_TX64X64
- { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
- { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_32X32
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
- { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_32X64
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X16, TX_16X16 } },
- { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_64X32
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
- { { TX_32X16, TX_16X16 }, { TX_32X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
- { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_64X64
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#if CONFIG_TX64X64
- { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
- { { TX_32X16, TX_32X16 }, { TX_32X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
- { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
-#if CONFIG_EXT_PARTITION
- {
-// BLOCK_64X128
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#if CONFIG_TX64X64
- { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
- { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
-#if CONFIG_TX64X64
- { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
- { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
- { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
- { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
- { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
- },
- {
-// BLOCK_128X64
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#if CONFIG_TX64X64
- { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
- { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
-#if CONFIG_TX64X64
- { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
- { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_128X128
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#if CONFIG_TX64X64
- { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
- { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
-#if CONFIG_TX64X64
- { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
- { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
-#endif // CONFIG_EXT_PARTITION
- {
-// BLOCK_4X16
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_4X4, TX_4X4 }, { TX_2X2, TX_2X2 } },
-#else
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
- { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
- },
- {
-// BLOCK_16X4
-#if CONFIG_CHROMA_2X2
- { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
- { { TX_4X4, TX_2X2 }, { TX_4X4, TX_2X2 } },
-#else
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_16X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- { { TX_16X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
- },
- {
-// BLOCK_8X32
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
- { { TX_8X8, TX_8X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
- { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
- { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X32, TX_8X16 }, { TX_4X16, TX_4X16 } },
- { { TX_8X8, TX_8X8 }, { TX_4X8, TX_4X8 } },
- },
- {
-// BLOCK_32X8
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif // CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
- { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
-#if CONFIG_TX64X64
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
-#endif // CONFIG_TX64X64
- { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
- { { TX_32X8, TX_16X4 }, { TX_16X8, TX_16X4 } },
- },
- {
-// BLOCK_16X64
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#endif
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X32, TX_16X32 }, { TX_8X16, TX_8X16 } },
- { { TX_16X16, TX_16X16 }, { TX_8X16, TX_8X16 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
-#endif
- { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
- { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X32, TX_8X32 }, { TX_8X32, TX_8X32 } },
- { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
- },
- {
-// BLOCK_64X16
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#endif
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X16, TX_16X8 }, { TX_16X16, TX_16X8 } },
- { { TX_32X16, TX_16X8 }, { TX_32X16, TX_16X8 } },
-#if CONFIG_TX64X64
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
- { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
-#endif
- { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
- { { TX_32X8, TX_32X8 }, { TX_32X8, TX_32X8 } },
- },
-#if CONFIG_EXT_PARTITION
- {
-// BLOCK_32X128
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
-#endif
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
- { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
- { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X32, TX_8X32 }, { TX_8X32, TX_8X32 } },
- { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
- },
- {
-// BLOCK_128X32
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
- { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
-#endif
- { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
- { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
- { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
- { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
- { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
- { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
- { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
- { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
- { { TX_8X32, TX_8X16 }, { TX_8X32, TX_8X16 } },
- { { TX_32X8, TX_32X8 }, { TX_32X8, TX_32X8 } },
- },
-#endif
-};
-
-// Generates 4 bit field in which each bit set to 1 represents
-// a blocksize partition 1111 means we split 64x64, 32x32, 16x16
-// and 8x8. 1000 means we just split the 64x64 to 32x32
+// Generates 5 bit field in which each bit set to 1 represents
+// a blocksize partition 11111 means we split 128x128, 64x64, 32x32, 16x16
+// and 8x8. 10000 means we just split the 128x128 to 64x64
/* clang-format off */
static const struct {
PARTITION_CONTEXT above;
PARTITION_CONTEXT left;
} partition_context_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_EXT_PARTITION
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 31, 31 }, // 2X2 - {0b11111, 0b11111}
- { 31, 31 }, // 2X4 - {0b11111, 0b11111}
- { 31, 31 }, // 4X2 - {0b11111, 0b11111}
-#endif
{ 31, 31 }, // 4X4 - {0b11111, 0b11111}
{ 31, 30 }, // 4X8 - {0b11111, 0b11110}
{ 30, 31 }, // 8X4 - {0b11110, 0b11111}
@@ -2070,131 +403,29 @@ static const struct {
{ 16, 0 }, // 64X128- {0b10000, 0b00000}
{ 0, 16 }, // 128X64- {0b00000, 0b10000}
{ 0, 0 }, // 128X128-{0b00000, 0b00000}
-
{ 31, 28 }, // 4X16 - {0b11111, 0b11100}
{ 28, 31 }, // 16X4 - {0b11100, 0b11111}
{ 30, 24 }, // 8X32 - {0b11110, 0b11000}
{ 24, 30 }, // 32X8 - {0b11000, 0b11110}
{ 28, 16 }, // 16X64 - {0b11100, 0b10000}
{ 16, 28 }, // 64X16 - {0b10000, 0b11100}
- { 24, 0 }, // 32X128- {0b11000, 0b00000}
- { 0, 24 }, // 128X32- {0b00000, 0b11000}
-#else
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 15, 15 }, // 2X2 - {0b1111, 0b1111}
- { 15, 15 }, // 2X4 - {0b1111, 0b1111}
- { 15, 15 }, // 4X2 - {0b1111, 0b1111}
-#endif
- { 15, 15 }, // 4X4 - {0b1111, 0b1111}
- { 15, 14 }, // 4X8 - {0b1111, 0b1110}
- { 14, 15 }, // 8X4 - {0b1110, 0b1111}
- { 14, 14 }, // 8X8 - {0b1110, 0b1110}
- { 14, 12 }, // 8X16 - {0b1110, 0b1100}
- { 12, 14 }, // 16X8 - {0b1100, 0b1110}
- { 12, 12 }, // 16X16 - {0b1100, 0b1100}
- { 12, 8 }, // 16X32 - {0b1100, 0b1000}
- { 8, 12 }, // 32X16 - {0b1000, 0b1100}
- { 8, 8 }, // 32X32 - {0b1000, 0b1000}
- { 8, 0 }, // 32X64 - {0b1000, 0b0000}
- { 0, 8 }, // 64X32 - {0b0000, 0b1000}
- { 0, 0 }, // 64X64 - {0b0000, 0b0000}
-
- { 15, 12 }, // 4X16 - {0b1111, 0b1100}
- { 12, 15 }, // 16X4 - {0b1100, 0b1111}
- { 8, 14 }, // 8X32 - {0b1110, 0b1000}
- { 14, 8 }, // 32X8 - {0b1000, 0b1110}
- { 12, 0 }, // 16X64- {0b1100, 0b0000}
- { 0, 12 }, // 64X16- {0b0000, 0b1100}
-#endif // CONFIG_EXT_PARTITION
};
/* clang-format on */
-#if CONFIG_KF_CTX
static const int intra_mode_context[INTRA_MODES] = {
- 0, 1, 2, 3, 4, 4, 4, 4, 3, 0,
-#if CONFIG_SMOOTH_HV
- 1, 2,
-#endif
- 0,
-};
-#endif
-
-#if CONFIG_SUPERTX
-static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
-// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
-// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
-#if CONFIG_CHROMA_2X2
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
-#endif
- { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
- { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
- { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
-#if CONFIG_TX64X64
- { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
-#endif // CONFIG_TX64X64
-};
-
-#if CONFIG_EXT_PARTITION_TYPES
-static const int partition_supertx_context_lookup[EXT_PARTITION_TYPES] = {
- -1, 0, 0, 1, 0, 0, 0, 0, 0, 0
+ 0, 1, 2, 3, 4, 4, 4, 4, 3, 0, 1, 2, 0,
};
-#else
-static const int partition_supertx_context_lookup[PARTITION_TYPES] = { -1, 0, 0,
- 1 };
-#endif // CONFIG_EXT_PARTITION_TYPES
-#endif // CONFIG_SUPERTX
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-// NCOBMC_ADAPT_INTRPL only supports block size >= BLOCK_8X8 and <= BLOCK_64X64
-static const ADAPT_OVERLAP_BLOCK adapt_overlap_block_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_2X2
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_2X4
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_4X2
-#endif
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_4X4
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_4X8
- ADAPT_OVERLAP_BLOCK_INVALID, // BLOCK_8X4
-
- // the rest of the block sizes round to the largest squared block less than
- // the given block size
- ADAPT_OVERLAP_BLOCK_8X8, ADAPT_OVERLAP_BLOCK_8X8, ADAPT_OVERLAP_BLOCK_8X8,
- ADAPT_OVERLAP_BLOCK_16X16, ADAPT_OVERLAP_BLOCK_16X16,
- ADAPT_OVERLAP_BLOCK_16X16, ADAPT_OVERLAP_BLOCK_32X32,
- ADAPT_OVERLAP_BLOCK_32X32, ADAPT_OVERLAP_BLOCK_32X32,
- ADAPT_OVERLAP_BLOCK_64X64,
-#if CONFIG_EXT_PARTITION
- ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
- ADAPT_OVERLAP_BLOCK_INVALID,
-#endif // CONFIG_EXT_PARTITION
- ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
- ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
- ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
-#if CONFIG_EXT_PARTITION
- ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID
-#endif // CONFIG_EXT_PARTITION
+// Note: this is also used in unit tests. So whenever one changes the table,
+// the unit tests need to be changed accordingly.
+static const int quant_dist_weight[4][2] = {
+ { 2, 3 }, { 2, 5 }, { 2, 7 }, { 1, MAX_FRAME_DISTANCE }
};
-
-static const BLOCK_SIZE bsize_2_sqr_bsize[BLOCK_SIZES] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- BLOCK_2X2, BLOCK_2X2, BLOCK_2X2,
-#endif
- BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
- BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
-#if CONFIG_EXT_PARTITION
- BLOCK_64X64, BLOCK_64X64,
-#endif
+static const int quant_dist_lookup_table[2][4][2] = {
+ { { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } },
+ { { 7, 9 }, { 5, 11 }, { 4, 12 }, { 3, 13 } },
};
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-
-#if CONFIG_ADAPT_SCAN
-#define EOB_THRESHOLD_NUM 2
-#endif
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/convolve.c b/third_party/aom/av1/common/convolve.c
index 5476f59a6..d57f44f8b 100644
--- a/third_party/aom/av1/common/convolve.c
+++ b/third_party/aom/av1/common/convolve.c
@@ -12,76 +12,60 @@
#include <assert.h>
#include <string.h>
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/blockd.h"
#include "av1/common/convolve.h"
#include "av1/common/filter.h"
#include "av1/common/onyxc_int.h"
+#include "av1/common/resize.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/mem.h"
-#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
-#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
-#define MAX_STEP (32)
-
-void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- ConvolveParams *conv_params) {
- int x, y;
- int filter_size = filter_params.taps;
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- src -= filter_size / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_q4 = subpel_x_q4;
- for (x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params, x_q4 & SUBPEL_MASK);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
-
- sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- if (conv_params->do_average)
- dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
- else
- dst[x] = sum;
-
- x_q4 += x_step_q4;
+void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const int16_t *x_filters, int x0_qn,
+ int x_step_qn) {
+ src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
+ for (int y = 0; y < h; ++y) {
+ int x_qn = x0_qn;
+ for (int x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
+ const int x_filter_idx =
+ (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ assert(x_filter_idx <= RS_SUBPEL_MASK);
+ const int16_t *const x_filter =
+ &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
+ int sum = 0;
+ for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
+ sum += src_x[k] * x_filter[k];
+ dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ x_qn += x_step_qn;
}
src += src_stride;
dst += dst_stride;
}
}
-void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_qn, int x_step_qn,
- ConvolveParams *conv_params) {
- int x, y;
- int filter_size = filter_params.taps;
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- src -= filter_size / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
-
- sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- if (conv_params->do_average)
- dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
- else
- dst[x] = sum;
-
+void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const int16_t *x_filters, int x0_qn,
+ int x_step_qn, int bd) {
+ src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
+ for (int y = 0; y < h; ++y) {
+ int x_qn = x0_qn;
+ for (int x = 0; x < w; ++x) {
+ const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
+ const int x_filter_idx =
+ (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ assert(x_filter_idx <= RS_SUBPEL_MASK);
+ const int16_t *const x_filter =
+ &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
+ int sum = 0;
+ for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
+ sum += src_x[k] * x_filter[k];
+ dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
x_qn += x_step_qn;
}
src += src_stride;
@@ -89,417 +73,358 @@ void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
-void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- ConvolveParams *conv_params) {
- int x, y;
- int filter_size = filter_params.taps;
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- src -= src_stride * (filter_size / 2 - 1);
- for (x = 0; x < w; ++x) {
- int y_q4 = subpel_y_q4;
- for (y = 0; y < h; ++y) {
- const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params, y_q4 & SUBPEL_MASK);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
-
- sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- if (conv_params->do_average)
- dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
- else
- dst[y * dst_stride] = sum;
-
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-void av1_convolve_vert_scale(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_qn, int y_step_qn,
- ConvolveParams *conv_params) {
- int x, y;
- int filter_size = filter_params.taps;
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- src -= src_stride * (filter_size / 2 - 1);
- for (x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y) {
- const uint8_t *const src_y =
- &src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
-
- sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- if (conv_params->do_average)
- dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
- else
- dst[y * dst_stride] = sum;
-
- y_qn += y_step_qn;
- }
- ++src;
- ++dst;
- }
-}
-
-static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
+void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (conv_params->do_average == 0) {
- int r;
- for (r = 0; r < h; ++r) {
- memcpy(dst, src, w);
- src += src_stride;
- dst += dst_stride;
- }
- } else {
- int r, c;
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
- dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
+ int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = w;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bd = 8;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+
+ // horizontal filter
+ const uint8_t *src_horiz = src - fo_vert * src_stride;
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ for (int y = 0; y < im_h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = (1 << (bd + FILTER_BITS - 1));
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
- src += src_stride;
- dst += dst_stride;
+ assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
+ im_block[y * im_stride + x] =
+ (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
-}
-void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_x =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
- if (conv_params->do_average == 0)
- aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- NULL, -1, w, h);
- else
- aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, NULL, -1, w, h);
- } else {
- av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_x_q4, x_step_q4, conv_params);
+ // vertical filter
+ int16_t *src_vert = im_block + fo_vert * im_stride;
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
+ }
+ assert(0 <= sum && sum < (1 << (offset_bits + 2)));
+ int16_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+ ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits));
+ }
}
}
-void av1_convolve_horiz_facade_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_x =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
- if (conv_params->do_average == 0)
- aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, NULL, -1, w, h);
- else
- aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, NULL, -1, w, h);
- } else {
- av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_x_q4, x_step_q4, conv_params);
- }
-}
+void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ (void)conv_params;
-void av1_convolve_horiz_facade_scale(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_qn, int x_step_qn,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_x = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_x_qn >> SCALE_EXTRA_BITS);
- if (conv_params->do_average == 0)
- aom_convolve8_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
- subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
- else
- aom_convolve8_avg_horiz_scale(src, src_stride, dst, dst_stride, filter_x,
- subpel_x_qn, x_step_qn, NULL, 0, -1, w, h);
- } else {
- av1_convolve_horiz_scale(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_x_qn, x_step_qn,
- conv_params);
- }
-}
+ assert(conv_params->round_0 <= FILTER_BITS);
+ assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
+ ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_y =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
- if (conv_params->do_average == 0) {
- aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
- y_step_q4, w, h);
- } else {
- aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
- filter_y, y_step_q4, w, h);
+ // vertical filter
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
+ }
+ dst[y * dst_stride + x] =
+ clip_pixel(ROUND_POWER_OF_TWO(res, FILTER_BITS));
}
- } else {
- av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, conv_params);
}
}
-void av1_convolve_vert_facade_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_y =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
- if (conv_params->do_average == 0) {
- aom_convolve8_vert_c(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
- y_step_q4, w, h);
- } else {
- aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, NULL, -1,
- filter_y, y_step_q4, w, h);
- }
- } else {
- av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, conv_params);
- }
-}
+void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+ (void)conv_params;
+
+ assert(bits >= 0);
+ assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+ ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_qn, int y_step_qn,
- ConvolveParams *conv_params) {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_y = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_y_qn >> SCALE_EXTRA_BITS);
- if (conv_params->do_average == 0) {
- aom_convolve8_vert_scale(src, src_stride, dst, dst_stride, NULL, 0, -1,
- filter_y, subpel_y_qn, y_step_qn, w, h);
- } else {
- aom_convolve8_avg_vert_scale(src, src_stride, dst, dst_stride, NULL, 0,
- -1, filter_y, subpel_y_qn, y_step_qn, w, h);
+ // horizontal filter
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
+ }
+ res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
+ dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits));
}
- } else {
- av1_convolve_vert_scale(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_y_qn, y_step_qn, conv_params);
}
}
-#if CONFIG_CONVOLVE_ROUND
-void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, int bits) {
- int r, c;
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
- dst[r * dst_stride + c] =
- clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], bits));
- }
+void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+
+ for (int y = 0; y < h; ++y) {
+ memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0]));
}
}
-#if CONFIG_COMPOUND_ROUND
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int x, y, k;
- uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bd = 8;
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (y = 0; y < im_h; ++y) {
- for (x = 0; x < w; ++x) {
- int32_t sum = 0;
- for (k = 0; k < filter_params_x->taps; ++k) {
+ for (int y = 0; y < im_h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = (1 << (bd + FILTER_BITS - 1));
+ for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
+ assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
+ (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
- uint8_t *src_vert = im_block + fo_vert * im_stride;
+ int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- CONV_BUF_TYPE sum = 0;
- for (k = 0; k < filter_params_y->taps; ++k) {
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
+ assert(0 <= sum && sum < (1 << (offset_bits + 2)));
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ dst8[y * dst8_stride + x] =
+ clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
+ } else {
dst[y * dst_stride + x] = res;
+ }
}
}
}
-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params) {
- int x, y, k;
- uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
- int im_stride = w;
+void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
-
- // horizontal filter
- const uint8_t *src_horiz = src - fo_vert * src_stride;
- for (y = 0; y < im_h; ++y) {
- int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
- int sum = 0;
- for (k = 0; k < filter_params_x->taps; ++k)
- sum += x_filter[k] * src_x[k - fo_horiz];
- im_block[y * im_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
- }
- src_horiz += src_stride;
- }
+ const int bits = FILTER_BITS - conv_params->round_0;
+ const int bd = 8;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
// vertical filter
- const uint8_t *src_vert = im_block + fo_vert * im_stride;
- for (x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y, y_qn += y_step_qn) {
- const uint8_t *const src_y =
- &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
- CONV_BUF_TYPE sum = 0;
- for (k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
}
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
+ res *= (1 << bits);
+ res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
+
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst8[y * dst8_stride + x] =
+ clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
+ } else {
dst[y * dst_stride + x] = res;
+ }
}
- src_vert++;
}
}
-#else
-
-/* When convolve-round is enabled and compound-round is disabled, we use a
- high-precision convolve filter.
- Note: For notes on hardware implementations, including the required
- bit widths for various intermediate values, see the comments above
- av1_warp_affine_c.
-*/
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
- int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int x, y, k;
- int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
+void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_1;
const int bd = 8;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ (void)filter_params_y;
+ (void)subpel_y_q4;
// horizontal filter
- const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (y = 0; y < im_h; ++y) {
- for (x = 0; x < w; ++x) {
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
+ }
+ res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
+ res += round_offset;
+
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst8[y * dst8_stride + x] =
+ clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
+ } else {
+ dst[y * dst_stride + x] = res;
}
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
+}
- // vertical filter
- int32_t *src_vert = im_block + fo_vert * im_stride;
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
+ uint8_t *dst8, int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const int bd = 8;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- CONV_BUF_TYPE sum = 1 << offset_bits;
- for (k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
+ res += round_offset;
+
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
+ } else {
dst[y * dst_stride + x] = res;
+ }
}
}
}
-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
+ int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
- int x, y, k;
- int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+ int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
+ CONV_BUF_TYPE *dst16 = conv_params->dst;
+ const int dst16_stride = conv_params->dst_stride;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ assert(bits >= 0);
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
@@ -507,245 +432,255 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
- for (y = 0; y < im_h; ++y) {
+ for (int y = 0; y < im_h; ++y) {
int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+ for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (k = 0; k < filter_params_x->taps; ++k) {
+ for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_x[k - fo_horiz];
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
- ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+ (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
src_horiz += src_stride;
}
// vertical filter
- int32_t *src_vert = im_block + fo_vert * im_stride;
+ int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (x = 0; x < w; ++x) {
+ for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+ for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+ const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
- CONV_BUF_TYPE sum = 1 << offset_bits;
- for (k = 0; k < filter_params_y->taps; ++k) {
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
}
assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
+ CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ int32_t tmp = dst16[y * dst16_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ /* Subtract round offset and convolve round */
+ tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
+ } else {
+ dst16[y * dst16_stride + x] = res;
+ }
+ } else {
+ /* Subtract round offset and convolve round */
+ int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
+ }
}
src_vert++;
}
}
-#endif // CONFIG_COMPOUND_ROUND
+
+static void convolve_2d_scale_wrapper(
+ const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_qn,
+ const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
+ ConvolveParams *conv_params) {
+ if (conv_params->is_compound) {
+ assert(conv_params->dst != NULL);
+ }
+ av1_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h, filter_params_x,
+ filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn,
+ y_step_qn, conv_params);
+}
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilters interp_filters, const int subpel_x_q4,
int x_step_q4, const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params) {
+ int scaled, ConvolveParams *conv_params,
+ const struct scale_factors *sf) {
(void)x_step_q4;
(void)y_step_q4;
(void)dst;
(void)dst_stride;
InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
- &filter_params_y);
-
- if (filter_params_y.taps < filter_params_x.taps) {
- uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
- (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
- int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
- CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
- int tr_dst_stride = MAX_SB_SIZE;
- int fo_vert = filter_params_y.taps / 2 - 1;
- int fo_horiz = filter_params_x.taps / 2 - 1;
-
- transpose_uint8(tr_src, tr_src_stride,
- src - fo_vert * src_stride - fo_horiz, src_stride,
- w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
- transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
- conv_params->dst_stride, w, h);
-
- // horizontal and vertical parameters are swapped because of the transpose
- if (scaled)
- av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- y_step_q4, subpel_x_q4, x_step_q4, conv_params);
- else
- av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- subpel_x_q4, conv_params);
- transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
- tr_dst_stride, h, w);
- } else {
- if (scaled)
- av1_convolve_2d_scale(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, x_step_q4,
- subpel_y_q4, y_step_q4, conv_params);
- else
- av1_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
- }
+ av1_get_convolve_filter_params(interp_filters, &filter_params_x,
+ &filter_params_y, w, h);
+
+ if (scaled)
+ convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y, subpel_x_q4,
+ x_step_q4, subpel_y_q4, y_step_q4, conv_params);
+ else
+ sf->convolve[subpel_x_q4 != 0][subpel_y_q4 != 0][conv_params->is_compound](
+ src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
}
-#if CONFIG_HIGHBITDEPTH
-void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
- uint8_t *dst8, int dst_stride, int w, int h,
- int bits, int bd) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- int r, c;
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
- dst[r * dst_stride + c] = clip_pixel_highbd(
- ROUND_POWER_OF_TWO(src[r * src_stride + c], bits), bd);
- }
+void av1_highbd_convolve_2d_copy_sr_c(
+ const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+ (void)bd;
+
+ for (int y = 0; y < h; ++y) {
+ memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0]));
}
}
-#if CONFIG_COMPOUND_ROUND
-void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int x, y, k;
- uint16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
+void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ assert(bits >= 0);
+ assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+ ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
// horizontal filter
- const uint16_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (y = 0; y < im_h; ++y) {
- for (x = 0; x < w; ++x) {
- int32_t sum = 0;
- for (k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
}
- im_block[y * im_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, conv_params->round_0), bd);
+ res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
}
}
+}
+void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ (void)conv_params;
+
+ assert(conv_params->round_0 <= FILTER_BITS);
+ assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
+ ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
// vertical filter
- uint16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- CONV_BUF_TYPE sum = 0;
- for (k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
}
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), bd);
}
}
}
-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
- int h, InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params, int bd) {
- int x, y, k;
- uint16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
+void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+ int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
- (void)bd;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ assert(bits >= 0);
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
- for (y = 0; y < im_h; ++y) {
- int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
- int sum = 0;
- for (k = 0; k < filter_params_x->taps; ++k)
- sum += x_filter[k] * src_x[k - fo_horiz];
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ for (int y = 0; y < im_h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = (1 << (bd + FILTER_BITS - 1));
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
+ }
+ assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
+ ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
- src_horiz += src_stride;
}
// vertical filter
- uint16_t *src_vert = im_block + fo_vert * im_stride;
- for (x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y, y_qn += y_step_qn) {
- const uint16_t *const src_y =
- &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
- CONV_BUF_TYPE sum = 0;
- for (k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+ int16_t *src_vert = im_block + fo_vert * im_stride;
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
+ assert(0 <= sum && sum < (1 << (offset_bits + 2)));
+ int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+ ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
}
- src_vert++;
}
}
-#else
-
-void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
+void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
+ uint16_t *dst16, int dst16_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
int x, y, k;
- int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+ int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ assert(round_bits >= 0);
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
@@ -760,439 +695,367 @@ void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
(void)bd;
im_block[y * im_stride + x] =
- ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+ (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
- int32_t *src_vert = im_block + fo_vert * im_stride;
+ int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) {
- CONV_BUF_TYPE sum = 1 << offset_bits;
+ int32_t sum = 1 << offset_bits;
for (k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
+ CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ dst16[y * dst16_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
+ } else {
+ dst[y * dst_stride + x] = res;
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
+ uint16_t *dst16, int dst16_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ assert(round_bits >= 0);
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+ assert(bits >= 0);
+ // horizontal filter
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_x->taps; ++k) {
+ res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
+ }
+ res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
+ res += round_offset;
+
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst16[y * dst16_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
+ } else {
+ dst[y * dst_stride + x] = res;
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
+ uint16_t *dst16, int dst16_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ assert(round_bits >= 0);
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ assert(bits >= 0);
+ // vertical filter
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ int32_t res = 0;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
+ res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
+ }
+ res *= (1 << bits);
+ res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
+
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst16[y * dst16_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
+ } else {
dst[y * dst_stride + x] = res;
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_2d_copy_c(
+ const uint16_t *src, int src_stride, uint16_t *dst16, int dst16_stride,
+ int w, int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ assert(bits >= 0);
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ for (int y = 0; y < h; ++y) {
+ for (int x = 0; x < w; ++x) {
+ CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
+ res += round_offset;
+ if (conv_params->do_average) {
+ int32_t tmp = dst[y * dst_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ tmp -= round_offset;
+ dst16[y * dst16_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+ } else {
+ dst[y * dst_stride + x] = res;
+ }
}
}
}
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
- int h, InterpFilterParams *filter_params_x,
+ uint16_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params, int bd) {
- int x, y, k;
- int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+ int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
-
+ CONV_BUF_TYPE *dst16 = conv_params->dst;
+ const int dst16_stride = conv_params->dst_stride;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ assert(bits >= 0);
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
- for (y = 0; y < im_h; ++y) {
+ for (int y = 0; y < im_h; ++y) {
int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+ for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (k = 0; k < filter_params_x->taps; ++k) {
+ for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_x[k - fo_horiz];
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
- ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+ (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
src_horiz += src_stride;
}
// vertical filter
- int32_t *src_vert = im_block + fo_vert * im_stride;
+ int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (x = 0; x < w; ++x) {
+ for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+ for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+ const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
- CONV_BUF_TYPE sum = 1 << offset_bits;
- for (k = 0; k < filter_params_y->taps; ++k) {
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
}
assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
+ CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ int32_t tmp = dst16[y * dst16_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ /* Subtract round offset and convolve round */
+ tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+ } else {
+ dst16[y * dst16_stride + x] = res;
+ }
+ } else {
+ /* Subtract round offset and convolve round */
+ int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+ }
}
src_vert++;
}
}
-#endif // CONFIG_COMPOUND_ROUND
void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
+ uint8_t *dst8, int dst_stride, int w, int h,
InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
int scaled, ConvolveParams *conv_params,
- int bd) {
+ const struct scale_factors *sf, int bd) {
(void)x_step_q4;
(void)y_step_q4;
- (void)dst;
(void)dst_stride;
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
- &filter_params_y);
-
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- if (filter_params_y.taps < filter_params_x.taps) {
- uint16_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
- (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
- int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
- CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
- int tr_dst_stride = MAX_SB_SIZE;
- int fo_vert = filter_params_y.taps / 2 - 1;
- int fo_horiz = filter_params_x.taps / 2 - 1;
-
- transpose_uint16(
- tr_src, tr_src_stride, src - fo_vert * src_stride - fo_horiz,
- src_stride, w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
- transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
- conv_params->dst_stride, w, h);
-
- // horizontal and vertical parameters are swapped because of the transpose
- if (scaled)
- av1_highbd_convolve_2d_scale(
- tr_src + fo_horiz * tr_src_stride + fo_vert, tr_src_stride, tr_dst,
- tr_dst_stride, h, w, &filter_params_y, &filter_params_x, subpel_y_q4,
- y_step_q4, subpel_x_q4, x_step_q4, conv_params, bd);
- else
- av1_highbd_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
- tr_src_stride, tr_dst, tr_dst_stride, h, w,
- &filter_params_y, &filter_params_x, subpel_y_q4,
- subpel_x_q4, conv_params, bd);
- transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
- tr_dst_stride, h, w);
- } else {
- if (scaled)
- av1_highbd_convolve_2d_scale(
- src, src_stride, conv_params->dst, conv_params->dst_stride, w, h,
- &filter_params_x, &filter_params_y, subpel_x_q4, x_step_q4,
- subpel_y_q4, y_step_q4, conv_params, bd);
- else
- av1_highbd_convolve_2d(src, src_stride, conv_params->dst,
- conv_params->dst_stride, w, h, &filter_params_x,
- &filter_params_y, subpel_x_q4, subpel_y_q4,
- conv_params, bd);
- }
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-#endif // CONFIG_CONVOLVE_ROUND
-
-typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_q4, int step_q4,
- ConvolveParams *conv_params);
-
-static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilters interp_filters,
- const int subpel_x_q4, int x_step_q4,
- const int subpel_y_q4, int y_step_q4,
- ConvolveParams *conv_params,
- ConvolveFunc convolve_horiz,
- ConvolveFunc convolve_vert) {
- int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
- int ignore_vert = y_step_q4 == SUBPEL_SHIFTS && subpel_y_q4 == 0;
-
InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
-
- assert(w <= MAX_BLOCK_WIDTH);
- assert(h <= MAX_BLOCK_HEIGHT);
- assert(y_step_q4 <= MAX_STEP);
- assert(x_step_q4 <= MAX_STEP);
-
- if (ignore_horiz && ignore_vert) {
- convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
- } else if (ignore_vert) {
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
- convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- subpel_x_q4, x_step_q4, conv_params);
- } else if (ignore_horiz) {
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
- convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
- subpel_y_q4, y_step_q4, conv_params);
- } else {
- // temp's size is set to a 256 aligned value to facilitate SIMD
- // implementation. The value is greater than (maximum possible intermediate
- // height or width) * MAX_SB_SIZE
- DECLARE_ALIGNED(16, uint8_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
- int filter_size;
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
-
- // we do filter with fewer taps first to reduce hardware implementation
- // complexity
- if (filter_params_y.taps < filter_params_x.taps) {
- int intermediate_width;
- int temp_stride = max_intermediate_size;
- ConvolveParams temp_conv_params;
- temp_conv_params.ref = 0;
- temp_conv_params.do_average = 0;
- temp_conv_params.round = CONVOLVE_OPT_ROUND;
- filter_size = filter_params_x.taps;
- intermediate_width =
- (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
- assert(intermediate_width <= max_intermediate_size);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
- intermediate_width, h, filter_params_y, subpel_y_q4,
- y_step_q4, &temp_conv_params);
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
- convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
- w, h, filter_params_x, subpel_x_q4, x_step_q4,
- conv_params);
- } else
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- {
- int intermediate_height;
- int temp_stride = MAX_SB_SIZE;
- ConvolveParams temp_conv_params;
- temp_conv_params.ref = 0;
- temp_conv_params.do_average = 0;
- temp_conv_params.round = CONVOLVE_OPT_ROUND;
- filter_size = filter_params_y.taps;
- intermediate_height =
- (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
- assert(intermediate_height <= max_intermediate_size);
- (void)max_intermediate_size;
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
-
- convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
- temp_stride, w, intermediate_height, filter_params_x,
- subpel_x_q4, x_step_q4, &temp_conv_params);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
- dst, dst_stride, w, h, filter_params_y, subpel_y_q4,
- y_step_q4, conv_params);
- }
- }
-}
+ av1_get_convolve_filter_params(interp_filters, &filter_params_x,
+ &filter_params_y, w, h);
-static void convolve_scale_helper(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilters interp_filters,
- const int subpel_x_qn, int x_step_qn,
- const int subpel_y_qn, int y_step_qn,
- ConvolveParams *conv_params,
- ConvolveFunc convolve_horiz,
- ConvolveFunc convolve_vert) {
- int ignore_horiz = x_step_qn == SCALE_SUBPEL_SHIFTS && subpel_x_qn == 0;
- int ignore_vert = y_step_qn == SCALE_SUBPEL_SHIFTS && subpel_y_qn == 0;
-
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
-
- assert(w <= MAX_BLOCK_WIDTH);
- assert(h <= MAX_BLOCK_HEIGHT);
- assert(y_step_qn <= (MAX_STEP << SCALE_EXTRA_BITS));
- assert(x_step_qn <= (MAX_STEP << SCALE_EXTRA_BITS));
-
- if (ignore_horiz && ignore_vert) {
- convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
- } else if (ignore_vert) {
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
- convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- subpel_x_qn, x_step_qn, conv_params);
- } else if (ignore_horiz) {
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
- convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
- subpel_y_qn, y_step_qn, conv_params);
- } else {
- // temp's size is set to a 256 aligned value to facilitate SIMD
- // implementation. The value is greater than (maximum possible intermediate
- // height or width) * MAX_SB_SIZE
- DECLARE_ALIGNED(16, uint8_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
- int filter_size;
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
-
- // we do filter with fewer taps first to reduce hardware implementation
- // complexity
- if (filter_params_y.taps < filter_params_x.taps) {
- int intermediate_width;
- int temp_stride = max_intermediate_size;
- ConvolveParams temp_conv_params;
- temp_conv_params.ref = 0;
- temp_conv_params.do_average = 0;
- temp_conv_params.round = CONVOLVE_OPT_ROUND;
- filter_size = filter_params_x.taps;
- intermediate_width =
- (((w - 1) * x_step_qn + subpel_x_qn) >> SCALE_SUBPEL_BITS) +
- filter_size;
- assert(intermediate_width <= max_intermediate_size);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
- intermediate_width, h, filter_params_y, subpel_y_qn,
- y_step_qn, &temp_conv_params);
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
- convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
- w, h, filter_params_x, subpel_x_qn, x_step_qn,
- conv_params);
- } else {
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- int intermediate_height;
- int temp_stride = MAX_SB_SIZE;
- ConvolveParams temp_conv_params;
- temp_conv_params.ref = 0;
- temp_conv_params.do_average = 0;
- temp_conv_params.round = CONVOLVE_OPT_ROUND;
- filter_size = filter_params_y.taps;
- intermediate_height =
- (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_size;
- assert(intermediate_height <= max_intermediate_size);
- (void)max_intermediate_size;
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
-
- convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
- temp_stride, w, intermediate_height, filter_params_x,
- subpel_x_qn, x_step_qn, &temp_conv_params);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
- dst, dst_stride, w, h, filter_params_y, subpel_y_qn,
- y_step_qn, conv_params);
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
+ if (scaled) {
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ if (conv_params->is_compound) {
+ assert(conv_params->dst != NULL);
}
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- }
-}
+ av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
+ &filter_params_x, &filter_params_y,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
+ conv_params, bd);
+ } else {
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, InterpFilters interp_filters,
- const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
- int y_step_q4, ConvolveParams *conv_params) {
- convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
- av1_convolve_horiz_facade, av1_convolve_vert_facade);
+ sf->highbd_convolve[subpel_x_q4 != 0][subpel_y_q4 !=
+ 0][conv_params->is_compound](
+ src, src_stride, dst, dst_stride, w, h, &filter_params_x,
+ &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
+ }
}
-void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, InterpFilters interp_filters,
- const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
- int y_step_q4, ConvolveParams *conv_params) {
- convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
- av1_convolve_horiz_facade_c, av1_convolve_vert_facade_c);
+// Note: Fixed size intermediate buffers, place limits on parameters
+// of some functions. 2d filtering proceeds in 2 steps:
+// (1) Interpolate horizontally into an intermediate buffer, temp.
+// (2) Interpolate temp vertically to derive the sub-pixel result.
+// Deriving the maximum number of rows in the temp buffer (135):
+// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
+// --Largest block size is 128x128 pixels.
+// --128 rows in the downscaled frame span a distance of (128 - 1) * 32 in the
+// original frame (in 1/16th pixel units).
+// --Must round-up because block may be located at sub-pixel position.
+// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
+// --((128 - 1) * 32 + 15) >> 4 + 8 = 263.
+#define WIENER_MAX_EXT_SIZE 263
+
+static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) {
+ int sum = 0;
+ for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
+ return sum;
}
-void av1_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x_qn,
- int x_step_qn, const int subpel_y_qn, int y_step_qn,
- ConvolveParams *conv_params) {
- convolve_scale_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x_qn, x_step_qn, subpel_y_qn, y_step_qn,
- conv_params, av1_convolve_horiz_facade_scale,
- av1_convolve_vert_facade_scale);
+static INLINE int highbd_horz_scalar_product(const uint16_t *a,
+ const int16_t *b) {
+ int sum = 0;
+ for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
+ return sum;
}
-void av1_lowbd_convolve_init_c(void) {
- // A placeholder for SIMD initialization
- return;
+static INLINE int highbd_vert_scalar_product(const uint16_t *a,
+ ptrdiff_t a_stride,
+ const int16_t *b) {
+ int sum = 0;
+ for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
+ return sum;
}
-void av1_highbd_convolve_init_c(void) {
- // A placeholder for SIMD initialization
- return;
+static const InterpKernel *get_filter_base(const int16_t *filter) {
+ // NOTE: This assumes that the filter table is 256-byte aligned.
+ // TODO(agrange) Modify to make independent of table alignment.
+ return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
}
-void av1_convolve_init(AV1_COMMON *cm) {
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- av1_highbd_convolve_init();
- else
- av1_lowbd_convolve_init();
-#else
- (void)cm;
- av1_lowbd_convolve_init();
-#endif
- return;
+static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
+ return (int)((const InterpKernel *)(intptr_t)f - base);
}
-#if CONFIG_HIGHBITDEPTH
-void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4, int avg,
- int bd) {
- int x, y;
- int filter_size = filter_params.taps;
- src -= filter_size / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_q4 = subpel_x_q4;
- for (x = 0; x < w; ++x) {
- const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params, x_q4 & SUBPEL_MASK);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
- if (avg)
- dst[x] = ROUND_POWER_OF_TWO(
- dst[x] +
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
- 1);
- else
- dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,
+ uint16_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *x_filters, int x0_q4,
+ int x_step_q4, int w, int h,
+ int round0_bits) {
+ const int bd = 8;
+ src -= SUBPEL_TAPS / 2 - 1;
+ for (int y = 0; y < h; ++y) {
+ int x_q4 = x0_q4;
+ for (int x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
+ const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
+ (1 << (bd + FILTER_BITS - 1));
+ const int sum = horz_scalar_product(src_x, x_filter) + rounding;
+ dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
+ WIENER_CLAMP_LIMIT(round0_bits, bd) - 1);
x_q4 += x_step_q4;
}
src += src_stride;
@@ -1200,66 +1063,25 @@ void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
}
}
-void av1_highbd_convolve_horiz_scale(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h,
- const InterpFilterParams filter_params,
- const int subpel_x_qn, int x_step_qn,
- int avg, int bd) {
- int x, y;
- int filter_size = filter_params.taps;
- src -= filter_size / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_qn = subpel_x_qn;
- for (x = 0; x < w; ++x) {
- const uint16_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
- if (avg)
- dst[x] = ROUND_POWER_OF_TWO(
- dst[x] +
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
- 1);
- else
- dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- x_qn += x_step_qn;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4, int avg,
- int bd) {
- int x, y;
- int filter_size = filter_params.taps;
- src -= src_stride * (filter_size / 2 - 1);
-
- for (x = 0; x < w; ++x) {
- int y_q4 = subpel_y_q4;
- for (y = 0; y < h; ++y) {
- const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params, y_q4 & SUBPEL_MASK);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
- if (avg) {
- dst[y * dst_stride] = ROUND_POWER_OF_TWO(
- dst[y * dst_stride] +
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
- 1);
- } else {
- dst[y * dst_stride] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
+static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *y_filters, int y0_q4,
+ int y_step_q4, int w, int h,
+ int round1_bits) {
+ const int bd = 8;
+ src -= src_stride * (SUBPEL_TAPS / 2 - 1);
+
+ for (int x = 0; x < w; ++x) {
+ int y_q4 = y0_q4;
+ for (int y = 0; y < h; ++y) {
+ const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
+ const int rounding =
+ ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
+ (1 << (bd + round1_bits - 1));
+ const int sum =
+ highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
+ dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, round1_bits));
y_q4 += y_step_q4;
}
++src;
@@ -1267,325 +1089,111 @@ void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
}
}
-void av1_highbd_convolve_vert_scale(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_qn, int y_step_qn,
- int avg, int bd) {
- int x, y;
- int filter_size = filter_params.taps;
- src -= src_stride * (filter_size / 2 - 1);
-
- for (x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (y = 0; y < h; ++y) {
- const uint16_t *const src_y =
- &src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx);
- int k, sum = 0;
- for (k = 0; k < filter_size; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
- if (avg) {
- dst[y * dst_stride] = ROUND_POWER_OF_TWO(
- dst[y * dst_stride] +
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
- 1);
- } else {
- dst[y * dst_stride] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
- y_qn += y_step_qn;
- }
- ++src;
- ++dst;
- }
-}
-
-static void highbd_convolve_copy(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- int avg, int bd) {
- if (avg == 0) {
- int r;
- for (r = 0; r < h; ++r) {
- memcpy(dst, src, w * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- }
- } else {
- int r, c;
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
- dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
- }
- src += src_stride;
- dst += dst_stride;
- }
- }
+void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h,
+ const ConvolveParams *conv_params) {
+ const InterpKernel *const filters_x = get_filter_base(filter_x);
+ const int x0_q4 = get_filter_offset(filter_x, filters_x);
+
+ const InterpKernel *const filters_y = get_filter_base(filter_y);
+ const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
+ uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
+ const int intermediate_height =
+ (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
+
+ assert(w <= MAX_SB_SIZE);
+ assert(h <= MAX_SB_SIZE);
+ assert(y_step_q4 <= 32);
+ assert(x_step_q4 <= 32);
+
+ convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
+ src_stride, temp, MAX_SB_SIZE, filters_x, x0_q4,
+ x_step_q4, w, intermediate_height,
+ conv_params->round_0);
+ convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
+ MAX_SB_SIZE, dst, dst_stride, filters_y, y0_q4,
+ y_step_q4, w, h, conv_params->round_1);
}
-void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride, int w,
- int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- int avg, int bd) {
+static void highbd_convolve_add_src_horiz_hip(
+ const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst,
+ ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4,
+ int x_step_q4, int w, int h, int round0_bits, int bd) {
+ const int extraprec_clamp_limit = WIENER_CLAMP_LIMIT(round0_bits, bd);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_x =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
- if (avg == 0)
- aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
- x_step_q4, NULL, -1, w, h, bd);
- else
- aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
- filter_x, x_step_q4, NULL, -1, w, h, bd);
- } else {
- av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_x_q4, x_step_q4, avg, bd);
- }
-}
-
-void av1_highbd_convolve_horiz_facade_scale(
- const uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride, int w,
- int h, const InterpFilterParams filter_params, const int subpel_x_qn,
- int x_step_qn, int avg, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- // TODO(debargha): Add special functions for filter_params.taps == SUBPEL_TAPS
- // as in the function above.
- av1_highbd_convolve_horiz_scale(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_x_qn, x_step_qn, avg,
- bd);
-}
-
-void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride, int w,
- int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- int avg, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-
- if (filter_params.taps == SUBPEL_TAPS) {
- const int16_t *filter_y =
- av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
- if (avg == 0) {
- aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
- filter_y, y_step_q4, w, h, bd);
- } else {
- aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
- -1, filter_y, y_step_q4, w, h, bd);
+ src -= SUBPEL_TAPS / 2 - 1;
+ for (int y = 0; y < h; ++y) {
+ int x_q4 = x0_q4;
+ for (int x = 0; x < w; ++x) {
+ const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
+ const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
+ (1 << (bd + FILTER_BITS - 1));
+ const int sum = highbd_horz_scalar_product(src_x, x_filter) + rounding;
+ dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
+ extraprec_clamp_limit - 1);
+ x_q4 += x_step_q4;
}
- } else {
- av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_y_q4, y_step_q4, avg, bd);
+ src += src_stride;
+ dst += dst_stride;
}
}
-void av1_highbd_convolve_vert_facade_scale(
- const uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride, int w,
- int h, const InterpFilterParams filter_params, const int subpel_y_qn,
- int y_step_qn, int avg, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- // TODO(debargha): Add special functions for filter_params.taps == SUBPEL_TAPS
- // as in the function above.
- av1_highbd_convolve_vert_scale(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_y_qn, y_step_qn, avg,
- bd);
-}
-
-void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x_q4,
- int x_step_q4, const int subpel_y_q4, int y_step_q4,
- int ref_idx, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+static void highbd_convolve_add_src_vert_hip(
+ const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8,
+ ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4,
+ int y_step_q4, int w, int h, int round1_bits, int bd) {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
- int ignore_vert = y_step_q4 == SUBPEL_SHIFTS && subpel_y_q4 == 0;
-
- assert(w <= MAX_BLOCK_WIDTH);
- assert(h <= MAX_BLOCK_HEIGHT);
- assert(y_step_q4 <= MAX_STEP);
- assert(x_step_q4 <= MAX_STEP);
-
- if (ignore_horiz && ignore_vert) {
- highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
- return;
- }
-
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- if (ignore_vert) {
- av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
- filter_params_x, subpel_x_q4, x_step_q4,
- ref_idx, bd);
- } else if (ignore_horiz) {
- av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
- filter_params_y, subpel_y_q4, y_step_q4,
- ref_idx, bd);
- } else {
- // temp's size is set to a 256 aligned value to facilitate SIMD
- // implementation. The value is greater than (maximum possible intermediate
- // height or width) * MAX_SB_SIZE
- DECLARE_ALIGNED(16, uint16_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
- int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
- int filter_size;
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
-
- if (filter_params_y.taps < filter_params_x.taps) {
- int intermediate_width;
- int temp_stride = max_intermediate_size;
- filter_size = filter_params_x.taps;
- intermediate_width =
- (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
- assert(intermediate_width <= max_intermediate_size);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_vert_facade(src8 - (filter_size / 2 - 1), src_stride,
- temp8, temp_stride, intermediate_width, h,
- filter_params_y, subpel_y_q4, y_step_q4,
- 0, bd);
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_horiz_facade(
- temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
- filter_params_x, subpel_x_q4, x_step_q4, ref_idx, bd);
- } else
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- {
- int intermediate_height;
- int temp_stride = MAX_SB_SIZE;
- filter_size = filter_params_y.taps;
-
- intermediate_height =
- (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
- assert(intermediate_height <= max_intermediate_size);
- (void)max_intermediate_size;
-
- av1_highbd_convolve_horiz_facade(
- src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
- temp_stride, w, intermediate_height, filter_params_x, subpel_x_q4,
- x_step_q4, 0, bd);
-
- filter_size = filter_params_y.taps;
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_vert_facade(
- temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
- dst_stride, w, h, filter_params_y, subpel_y_q4, y_step_q4, ref_idx,
- bd);
+ src -= src_stride * (SUBPEL_TAPS / 2 - 1);
+ for (int x = 0; x < w; ++x) {
+ int y_q4 = y0_q4;
+ for (int y = 0; y < h; ++y) {
+ const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
+ const int rounding =
+ ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
+ (1 << (bd + round1_bits - 1));
+ const int sum =
+ highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
+ dst[y * dst_stride] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, round1_bits), bd);
+ y_q4 += y_step_q4;
}
+ ++src;
+ ++dst;
}
}
-void av1_highbd_convolve_scale(const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride, int w, int h,
- InterpFilters interp_filters,
- const int subpel_x_qn, int x_step_qn,
- const int subpel_y_qn, int y_step_qn,
- int ref_idx, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- int ignore_horiz = x_step_qn == SCALE_SUBPEL_SHIFTS && subpel_x_qn == 0;
- int ignore_vert = y_step_qn == SCALE_SUBPEL_SHIFTS && subpel_y_qn == 0;
-
- assert(w <= MAX_BLOCK_WIDTH);
- assert(h <= MAX_BLOCK_HEIGHT);
- assert(y_step_qn <= (MAX_STEP << SCALE_EXTRA_BITS));
- assert(x_step_qn <= (MAX_STEP << SCALE_EXTRA_BITS));
-
- if (ignore_horiz && ignore_vert) {
- highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
- return;
- }
-
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- if (ignore_vert) {
- av1_highbd_convolve_horiz_facade_scale(src8, src_stride, dst8, dst_stride,
- w, h, filter_params_x, subpel_x_qn,
- x_step_qn, ref_idx, bd);
- } else if (ignore_horiz) {
- av1_highbd_convolve_vert_facade_scale(src8, src_stride, dst8, dst_stride, w,
- h, filter_params_y, subpel_y_qn,
- y_step_qn, ref_idx, bd);
- } else {
- // temp's size is set to a 256 aligned value to facilitate SIMD
- // implementation. The value is greater than (maximum possible intermediate
- // height or width) * MAX_SB_SIZE
- DECLARE_ALIGNED(16, uint16_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
- int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
- int filter_size;
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
-
- if (filter_params_y.taps < filter_params_x.taps) {
- int intermediate_width;
- int temp_stride = max_intermediate_size;
- filter_size = filter_params_x.taps;
- intermediate_width =
- (((w - 1) * x_step_qn + subpel_x_qn) >> SCALE_SUBPEL_BITS) +
- filter_size;
- assert(intermediate_width <= max_intermediate_size);
-
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_vert_facade_scale(
- src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
- intermediate_width, h, filter_params_y, subpel_y_qn, y_step_qn, 0,
- bd);
-
- assert(filter_params_x.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_horiz_facade_scale(
- temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
- filter_params_x, subpel_x_qn, x_step_qn, ref_idx, bd);
- } else {
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- int intermediate_height;
- int temp_stride = MAX_SB_SIZE;
- filter_size = filter_params_y.taps;
- intermediate_height =
- (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_size;
- assert(intermediate_height <= max_intermediate_size);
- (void)max_intermediate_size;
-
- av1_highbd_convolve_horiz_facade_scale(
- src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
- temp_stride, w, intermediate_height, filter_params_x, subpel_x_qn,
- x_step_qn, 0, bd);
-
- filter_size = filter_params_y.taps;
- assert(filter_params_y.taps <= MAX_FILTER_TAP);
-
- av1_highbd_convolve_vert_facade_scale(
- temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
- dst_stride, w, h, filter_params_y, subpel_y_qn, y_step_qn, ref_idx,
- bd);
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- }
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- }
+void av1_highbd_wiener_convolve_add_src_c(
+ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
+ ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4, int w, int h,
+ const ConvolveParams *conv_params, int bd) {
+ const InterpKernel *const filters_x = get_filter_base(filter_x);
+ const int x0_q4 = get_filter_offset(filter_x, filters_x);
+
+ const InterpKernel *const filters_y = get_filter_base(filter_y);
+ const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
+ uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
+ const int intermediate_height =
+ (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
+
+ assert(w <= MAX_SB_SIZE);
+ assert(h <= MAX_SB_SIZE);
+ assert(y_step_q4 <= 32);
+ assert(x_step_q4 <= 32);
+ assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
+
+ highbd_convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
+ src_stride, temp, MAX_SB_SIZE, filters_x,
+ x0_q4, x_step_q4, w, intermediate_height,
+ conv_params->round_0, bd);
+ highbd_convolve_add_src_vert_hip(
+ temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride,
+ filters_y, y0_q4, y_step_q4, w, h, conv_params->round_1, bd);
}
-#endif // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/av1/common/convolve.h b/third_party/aom/av1/common/convolve.h
index c43f649e0..1b2c2d0d5 100644
--- a/third_party/aom/av1/common/convolve.h
+++ b/third_party/aom/av1/common/convolve.h
@@ -17,140 +17,119 @@
extern "C" {
#endif
-typedef enum CONVOLVE_OPT {
- // indicate the results in dst buf is rounded by FILTER_BITS or not
- CONVOLVE_OPT_ROUND,
- CONVOLVE_OPT_NO_ROUND,
-} CONVOLVE_OPT;
-
-typedef int32_t CONV_BUF_TYPE;
-
+typedef uint16_t CONV_BUF_TYPE;
typedef struct ConvolveParams {
int ref;
int do_average;
- CONVOLVE_OPT round;
CONV_BUF_TYPE *dst;
int dst_stride;
int round_0;
int round_1;
int plane;
- int do_post_rounding;
+ int is_compound;
+ int use_jnt_comp_avg;
+ int fwd_offset;
+ int bck_offset;
} ConvolveParams;
-static INLINE ConvolveParams get_conv_params(int ref, int do_average,
- int plane) {
- ConvolveParams conv_params;
- conv_params.ref = ref;
- conv_params.do_average = do_average;
- conv_params.round = CONVOLVE_OPT_ROUND;
- conv_params.plane = plane;
- conv_params.do_post_rounding = 0;
- return conv_params;
-}
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-static INLINE void av1_convolve_filter_params_fixup_1212(
- const InterpFilterParams *params_x, InterpFilterParams *params_y) {
- if (params_x->interp_filter == MULTITAP_SHARP &&
- params_y->interp_filter == MULTITAP_SHARP) {
- // Avoid two directions both using 12-tap filter.
- // This will reduce hardware implementation cost.
- *params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
- }
-}
-#endif
-
-static INLINE void av1_get_convolve_filter_params(
- InterpFilters interp_filters, int avoid_1212, InterpFilterParams *params_x,
- InterpFilterParams *params_y) {
-#if CONFIG_DUAL_FILTER
+#define ROUND0_BITS 3
+#define COMPOUND_ROUND1_BITS 7
+#define WIENER_ROUND0_BITS 3
+
+#define WIENER_CLAMP_LIMIT(r0, bd) (1 << ((bd) + 1 + FILTER_BITS - r0))
+
+typedef void (*aom_convolve_fn_t)(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params);
+
+typedef void (*aom_highbd_convolve_fn_t)(
+ const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd);
+
+static INLINE void av1_get_convolve_filter_params(InterpFilters interp_filters,
+ InterpFilterParams *params_x,
+ InterpFilterParams *params_y,
+ int w, int h) {
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
-#else
- InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 0);
- InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
-#endif
-
- *params_x = av1_get_interp_filter_params(filter_x);
- *params_y = av1_get_interp_filter_params(filter_y);
-
- if (avoid_1212) {
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- convolve_filter_params_fixup_1212(params_x, params_y);
-#endif
- }
+ *params_x = av1_get_interp_filter_params_with_block_size(filter_x, w);
+ *params_y = av1_get_interp_filter_params_with_block_size(filter_y, h);
}
struct AV1Common;
-void av1_convolve_init(struct AV1Common *cm);
+struct scale_factors;
-#if CONFIG_CONVOLVE_ROUND
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilters interp_filters, const int subpel_x_q4,
int x_step_q4, const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params);
+ int scaled, ConvolveParams *conv_params,
+ const struct scale_factors *sf);
static INLINE ConvolveParams get_conv_params_no_round(int ref, int do_average,
- int plane, int32_t *dst,
- int dst_stride) {
+ int plane,
+ CONV_BUF_TYPE *dst,
+ int dst_stride,
+ int is_compound, int bd) {
ConvolveParams conv_params;
conv_params.ref = ref;
conv_params.do_average = do_average;
- conv_params.round = CONVOLVE_OPT_NO_ROUND;
-#if CONFIG_COMPOUND_ROUND
- conv_params.round_0 = FILTER_BITS;
-#else
- conv_params.round_0 = 5;
-#endif
- conv_params.round_1 = 0;
+ assert(IMPLIES(do_average, is_compound));
+ conv_params.is_compound = is_compound;
+ conv_params.round_0 = ROUND0_BITS;
+ conv_params.round_1 = is_compound ? COMPOUND_ROUND1_BITS
+ : 2 * FILTER_BITS - conv_params.round_0;
+ const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
+ assert(IMPLIES(bd < 12, intbufrange <= 16));
+ if (intbufrange > 16) {
+ conv_params.round_0 += intbufrange - 16;
+ if (!is_compound) conv_params.round_1 -= intbufrange - 16;
+ }
+ // TODO(yunqing): The following dst should only be valid while
+ // is_compound = 1;
conv_params.dst = dst;
conv_params.dst_stride = dst_stride;
conv_params.plane = plane;
- conv_params.do_post_rounding = 0;
return conv_params;
}
-#if CONFIG_HIGHBITDEPTH
+static INLINE ConvolveParams get_conv_params(int ref, int do_average, int plane,
+ int bd) {
+ return get_conv_params_no_round(ref, do_average, plane, NULL, 0, 0, bd);
+}
+
+static INLINE ConvolveParams get_conv_params_wiener(int bd) {
+ ConvolveParams conv_params;
+ (void)bd;
+ conv_params.ref = 0;
+ conv_params.do_average = 0;
+ conv_params.is_compound = 0;
+ conv_params.round_0 = WIENER_ROUND0_BITS;
+ conv_params.round_1 = 2 * FILTER_BITS - conv_params.round_0;
+ const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
+ assert(IMPLIES(bd < 12, intbufrange <= 16));
+ if (intbufrange > 16) {
+ conv_params.round_0 += intbufrange - 16;
+ conv_params.round_1 -= intbufrange - 16;
+ }
+ conv_params.dst = NULL;
+ conv_params.dst_stride = 0;
+ conv_params.plane = 0;
+ return conv_params;
+}
+
void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
int scaled, ConvolveParams *conv_params,
- int bd);
-#endif
-#endif // CONFIG_CONVOLVE_ROUND
-
-void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, InterpFilters interp_filters,
- const int subpel_x, int xstep, const int subpel_y, int ystep,
- ConvolveParams *conv_params);
-
-void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, InterpFilters interp_filters,
- const int subpel_x, int xstep, const int subpel_y,
- int ystep, ConvolveParams *conv_params);
-
-void av1_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x,
- int xstep, const int subpel_y, int ystep,
- ConvolveParams *conv_params);
-
-#if CONFIG_HIGHBITDEPTH
-void av1_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x,
- int xstep, const int subpel_y, int ystep, int avg,
- int bd);
-
-void av1_highbd_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x,
- int xstep, const int subpel_y, int ystep,
- int avg, int bd);
-#endif // CONFIG_HIGHBITDEPTH
+ const struct scale_factors *sf, int bd);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/daala_tx.c b/third_party/aom/av1/common/daala_tx.c
deleted file mode 100644
index e5b2372e3..000000000
--- a/third_party/aom/av1/common/daala_tx.c
+++ /dev/null
@@ -1,4331 +0,0 @@
-#include "av1/common/daala_tx.h"
-#include "av1/common/odintrin.h"
-
-/* clang-format off */
-
-# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
-
-/* TODO: Daala DCT overflow checks need to be ported as a later test */
-# if defined(OD_DCT_CHECK_OVERFLOW)
-# else
-# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
-# endif
-
-#define OD_FDCT_2(p0, p1) \
- /* Embedded 2-point orthonormal Type-II fDCT. */ \
- do { \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
- p0 -= (p1*13573 + 16384) >> 15; \
- /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
- p1 += (p0*5793 + 4096) >> 13; \
- /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
- p0 -= (p1*3393 + 4096) >> 13; \
- } \
- while (0)
-
-#define OD_IDCT_2(p0, p1) \
- /* Embedded 2-point orthonormal Type-II iDCT. */ \
- do { \
- /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- p0 += (p1*3393 + 4096) >> 13; \
- /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- p1 -= (p0*5793 + 4096) >> 13; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- p0 += (p1*13573 + 16384) >> 15; \
- } \
- while (0)
-
-#define OD_FDCT_2_ASYM(p0, p1, p1h) \
- /* Embedded 2-point asymmetric Type-II fDCT. */ \
- do { \
- p0 += p1h; \
- p1 = p0 - p1; \
- } \
- while (0)
-
-#define OD_IDCT_2_ASYM(p0, p1, p1h) \
- /* Embedded 2-point asymmetric Type-II iDCT. */ \
- do { \
- p1 = p0 - p1; \
- p1h = OD_DCT_RSHIFT(p1, 1); \
- p0 -= p1h; \
- } \
- while (0)
-
-#define OD_FDST_2(p0, p1) \
- /* Embedded 2-point orthonormal Type-IV fDST. */ \
- do { \
- /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
- p0 -= (p1*10947 + 8192) >> 14; \
- /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
- p1 += (p0*473 + 256) >> 9; \
- /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
- p0 -= (p1*10947 + 8192) >> 14; \
- } \
- while (0)
-
-#define OD_IDST_2(p0, p1) \
- /* Embedded 2-point orthonormal Type-IV iDST. */ \
- do { \
- /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
- p0 += (p1*10947 + 8192) >> 14; \
- /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- p1 -= (p0*473 + 256) >> 9; \
- /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- p0 += (p1*10947 + 8192) >> 14; \
- } \
- while (0)
-
-#define OD_FDST_2_ASYM(p0, p1) \
- /* Embedded 2-point asymmetric Type-IV fDST. */ \
- do { \
- /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
- p0 -= (p1*11507 + 8192) >> 14; \
- /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
- OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
- p1 += (p0*669 + 512) >> 10; \
- /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
- OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
- p0 -= (p1*4573 + 2048) >> 12; \
- } \
- while (0)
-
-#define OD_IDST_2_ASYM(p0, p1) \
- /* Embedded 2-point asymmetric Type-IV iDST. */ \
- do { \
- /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
- p0 += (p1*4573 + 2048) >> 12; \
- /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
- p1 -= (p0*669 + 512) >> 10; \
- /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
- p0 += (p1*11507 + 8192) >> 14; \
- } \
- while (0)
-
-#define OD_FDCT_4(q0, q2, q1, q3) \
- /* Embedded 4-point orthonormal Type-II fDCT. */ \
- do { \
- int q2h; \
- int q3h; \
- q3 = q0 - q3; \
- q3h = OD_DCT_RSHIFT(q3, 1); \
- q0 -= q3h; \
- q2 += q1; \
- q2h = OD_DCT_RSHIFT(q2, 1); \
- q1 = q2h - q1; \
- OD_FDCT_2_ASYM(q0, q2, q2h); \
- OD_FDST_2_ASYM(q3, q1); \
- } \
- while (0)
-
-#define OD_IDCT_4(q0, q2, q1, q3) \
- /* Embedded 4-point orthonormal Type-II iDCT. */ \
- do { \
- int q1h; \
- int q3h; \
- OD_IDST_2_ASYM(q3, q2); \
- OD_IDCT_2_ASYM(q0, q1, q1h); \
- q3h = OD_DCT_RSHIFT(q3, 1); \
- q0 += q3h; \
- q3 = q0 - q3; \
- q2 = q1h - q2; \
- q1 -= q2; \
- } \
- while (0)
-
-#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
- /* Embedded 4-point asymmetric Type-II fDCT. */ \
- do { \
- q0 += q3h; \
- q3 = q0 - q3; \
- q1 = q2h - q1; \
- q2 = q1 - q2; \
- OD_FDCT_2(q0, q2); \
- OD_FDST_2(q3, q1); \
- } \
- while (0)
-
-#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
- /* Embedded 4-point asymmetric Type-II iDCT. */ \
- do { \
- OD_IDST_2(q3, q2); \
- OD_IDCT_2(q0, q1); \
- q1 = q2 - q1; \
- q1h = OD_DCT_RSHIFT(q1, 1); \
- q2 = q1h - q2; \
- q3 = q0 - q3; \
- q3h = OD_DCT_RSHIFT(q3, 1); \
- q0 -= q3h; \
- } \
- while (0)
-
-#define OD_FDST_4(q0, q2, q1, q3) \
- /* Embedded 4-point orthonormal Type-IV fDST. */ \
- do { \
- int q0h; \
- int q1h; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
- q2 += (q1*13573 + 16384) >> 15; \
- /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
- q1 -= (q2*5793 + 4096) >> 13; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
- q2 += (q1*3393 + 4096) >> 13; \
- q0 += q2; \
- q0h = OD_DCT_RSHIFT(q0, 1); \
- q2 = q0h - q2; \
- q1 += q3; \
- q1h = OD_DCT_RSHIFT(q1, 1); \
- q3 -= q1h; \
- /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
- 0.524455699240090 */ \
- OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
- q2 -= (q1*537 + 512) >> 10; \
- /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
- OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
- q1 += (q2*1609 + 1024) >> 11; \
- /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
- 0.223847182092655 */ \
- OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
- q2 += (q1*7335 + 16384) >> 15; \
- /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
- 0.6215036383171189 */ \
- OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
- q3 += (q0*5091 + 4096) >> 13; \
- /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
- OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
- q0 -= (q3*5681 + 2048) >> 12; \
- /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
- 0.52204745462729 */ \
- OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
- q3 += (q0*4277 + 4096) >> 13; \
- } \
- while (0)
-
-#define OD_IDST_4(q0, q2, q1, q3) \
- /* Embedded 4-point orthonormal Type-IV iDST. */ \
- do { \
- int q0h; \
- int q2h; \
- /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
- 0.52204745462729 */ \
- q3 -= (q0*4277 + 4096) >> 13; \
- /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
- q0 += (q3*5681 + 2048) >> 12; \
- /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
- 0.6215036383171189 */ \
- q3 -= (q0*5091 + 4096) >> 13; \
- /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
- 0.223847182092655 */ \
- q1 -= (q2*7335 + 16384) >> 15; \
- /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
- q2 -= (q1*1609 + 1024) >> 11; \
- /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
- 0.524455699240090 */ \
- q1 += (q2*537 + 512) >> 10; \
- q2h = OD_DCT_RSHIFT(q2, 1); \
- q3 += q2h; \
- q2 -= q3; \
- q0h = OD_DCT_RSHIFT(q0, 1); \
- q1 = q0h - q1; \
- q0 -= q1; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- q1 -= (q2*3393 + 4096) >> 13; \
- /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- q2 += (q1*5793 + 4096) >> 13; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- q1 -= (q2*13573 + 16384) >> 15; \
- } \
- while (0)
-
-#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
- /* Embedded 4-point asymmetric Type-IV fDST. */ \
- do { \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
- t2 -= (t1*7489 + 4096) >> 13; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
- t1 += (t2*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
- t2 += (t1*19195 + 16384) >> 15; \
- t3 += OD_DCT_RSHIFT(t2, 1); \
- t2 -= t3; \
- t1 = t0h - t1; \
- t0 -= t1; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
- t3 += (t0*6723 + 4096) >> 13; \
- /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
- t0 -= (t3*8035 + 4096) >> 13; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
- t3 += (t0*6723 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
- t2 += (t1*8757 + 8192) >> 14; \
- /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
- t1 -= (t2*6811 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
- t2 += (t1*8757 + 8192) >> 14; \
- } \
- while (0)
-
-#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
- /* Embedded 4-point asymmetric Type-IV iDST. */ \
- do { \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- t1 -= (t2*8757 + 8192) >> 14; \
- /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- t2 += (t1*6811 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- t1 -= (t2*8757 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- t3 -= (t0*6723 + 4096) >> 13; \
- /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- t0 += (t3*8035 + 4096) >> 13; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- t3 -= (t0*6723 + 4096) >> 13; \
- t0 += t2; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t2 = t0h - t2; \
- t1 += t3; \
- t3 -= OD_DCT_RSHIFT(t1, 1); \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- t1 -= (t2*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- t2 -= (t1*11585 + 8192) >> 14; \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- t1 += (t2*7489 + 4096) >> 13; \
- } \
- while (0)
-
-#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
- /* Embedded 8-point orthonormal Type-II fDCT. */ \
- do { \
- int r4h; \
- int r5h; \
- int r6h; \
- int r7h; \
- r7 = r0 - r7; \
- r7h = OD_DCT_RSHIFT(r7, 1); \
- r0 -= r7h; \
- r6 += r1; \
- r6h = OD_DCT_RSHIFT(r6, 1); \
- r1 = r6h - r1; \
- r5 = r2 - r5; \
- r5h = OD_DCT_RSHIFT(r5, 1); \
- r2 -= r5h; \
- r4 += r3; \
- r4h = OD_DCT_RSHIFT(r4, 1); \
- r3 = r4h - r3; \
- OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
- OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
- } \
- while (0)
-
-#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
- /* Embedded 8-point orthonormal Type-II iDCT. */ \
- do { \
- int r1h; \
- int r3h; \
- int r5h; \
- int r7h; \
- OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
- OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
- r0 += r7h; \
- r7 = r0 - r7; \
- r6 = r1h - r6; \
- r1 -= r6; \
- r5h = OD_DCT_RSHIFT(r5, 1); \
- r2 += r5h; \
- r5 = r2 - r5; \
- r4 = r3h - r4; \
- r3 -= r4; \
- } \
- while (0)
-
-#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
- /* Embedded 8-point asymmetric Type-II fDCT. */ \
- do { \
- r0 += r7h; \
- r7 = r0 - r7; \
- r1 = r6h - r1; \
- r6 -= r1; \
- r2 += r5h; \
- r5 = r2 - r5; \
- r3 = r4h - r3; \
- r4 -= r3; \
- OD_FDCT_4(r0, r4, r2, r6); \
- OD_FDST_4(r7, r3, r5, r1); \
- } \
- while (0)
-
-#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
- /* Embedded 8-point asymmetric Type-II iDCT. */ \
- do { \
- OD_IDST_4(r7, r5, r6, r4); \
- OD_IDCT_4(r0, r2, r1, r3); \
- r7 = r0 - r7; \
- r7h = OD_DCT_RSHIFT(r7, 1); \
- r0 -= r7h; \
- r1 += r6; \
- r1h = OD_DCT_RSHIFT(r1, 1); \
- r6 = r1h - r6; \
- r5 = r2 - r5; \
- r5h = OD_DCT_RSHIFT(r5, 1); \
- r2 -= r5h; \
- r3 += r4; \
- r3h = OD_DCT_RSHIFT(r3, 1); \
- r4 = r3h - r4; \
- } \
- while (0)
-
-#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
- /* Embedded 8-point orthonormal Type-IV fDST. */ \
- do { \
- int t0h; \
- int t2h; \
- int t5h; \
- int t7h; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
- t6 -= (t1*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
- t1 += (t6*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
- t6 -= (t1*13573 + 16384) >> 15; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
- t5 -= (t2*21895 + 16384) >> 15; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
- t2 += (t5*15137 + 8192) >> 14; \
- /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
- t5 -= (t2*10947 + 8192) >> 14; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
- t4 -= (t3*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
- t3 += (t4*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
- t4 -= (t3*3259 + 8192) >> 14; \
- t7 += t1; \
- t7h = OD_DCT_RSHIFT(t7, 1); \
- t1 -= t7h; \
- t2 = t3 - t2; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- t3 -= t2h; \
- t0 -= t6; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t6 += t0h; \
- t5 = t4 - t5; \
- t5h = OD_DCT_RSHIFT(t5, 1); \
- t4 -= t5h; \
- t1 += t5h; \
- t5 = t1 - t5; \
- t4 += t0h; \
- t0 -= t4; \
- t6 -= t2h; \
- t2 += t6; \
- t3 -= t7h; \
- t7 += t3; \
- /* TODO: Can we move this into another operation */ \
- t7 = -t7; \
- /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
- t0 -= (t7*7425 + 4096) >> 13; \
- /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
- OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
- t7 += (t0*8153 + 4096) >> 13; \
- /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
- t0 -= (t7*7425 + 4096) >> 13; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
- t6 -= (t1*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
- t1 += (t6*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
- t6 -= (t1*4861 + 16384) >> 15; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
- t2 -= (t5*2455 + 2048) >> 12; \
- /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
- t5 += (t2*7225 + 4096) >> 13; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
- t2 -= (t5*2455 + 2048) >> 12; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
- t4 -= (t3*11725 + 16384) >> 15; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
- t3 += (t4*5197 + 4096) >> 13; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
- t4 -= (t3*11725 + 16384) >> 15; \
- } \
- while (0)
-
-#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
- /* Embedded 8-point orthonormal Type-IV iDST. */ \
- do { \
- int t0h; \
- int t2h; \
- int t5h_; \
- int t7h_; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
- t1 += (t6*11725 + 16384) >> 15; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
- t6 -= (t1*5197 + 4096) >> 13; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
- t1 += (t6*11725 + 16384) >> 15; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
- t2 += (t5*2455 + 2048) >> 12; \
- /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
- t5 -= (t2*7225 + 4096) >> 13; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
- t2 += (t5*2455 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
- t3 += (t4*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
- t4 -= (t3*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
- t3 += (t4*4861 + 16384) >> 15; \
- /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
- t0 += (t7*7425 + 4096) >> 13; \
- /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
- t7 -= (t0*8153 + 4096) >> 13; \
- /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
- t0 += (t7*7425 + 4096) >> 13; \
- /* TODO: Can we move this into another operation */ \
- t7 = -t7; \
- t7 -= t6; \
- t7h_ = OD_DCT_RSHIFT(t7, 1); \
- t6 += t7h_; \
- t2 -= t3; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- t3 += t2h; \
- t0 += t1; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t1 -= t0h; \
- t5 = t4 - t5; \
- t5h_ = OD_DCT_RSHIFT(t5, 1); \
- t4 -= t5h_; \
- t1 += t5h_; \
- t5 = t1 - t5; \
- t3 -= t0h; \
- t0 += t3; \
- t6 += t2h; \
- t2 = t6 - t2; \
- t4 += t7h_; \
- t7 -= t4; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- t1 += (t6*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- t6 -= (t1*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- t1 += (t6*3259 + 8192) >> 14; \
- /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- t5 += (t2*10947 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t2 -= (t5*15137 + 8192) >> 14; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- t5 += (t2*21895 + 16384) >> 15; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- t3 += (t4*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- t4 -= (t3*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- t3 += (t4*13573 + 16384) >> 15; \
- } \
- while (0)
-
-/* Rewrite this so that t0h can be passed in. */
-#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
- /* Embedded 8-point asymmetric Type-IV fDST. */ \
- do { \
- int t0h; \
- int t2h; \
- int t5h; \
- int t7h; \
- /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
- OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
- t6 += (t1*1035 + 1024) >> 11; \
- /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
- t1 -= (t6*3675 + 2048) >> 12; \
- /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
- t6 -= (t1*851 + 4096) >> 13; \
- /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
- OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
- t5 += (t2*4379 + 4096) >> 13; \
- /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
- t2 -= (t5*10217 + 4096) >> 13; \
- /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
- t5 += (t2*4379 + 8192) >> 14; \
- /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
- OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
- t4 += (t3*12905 + 8192) >> 14; \
- /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
- t3 -= (t4*3363 + 4096) >> 13; \
- /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
- t4 -= (t3*3525 + 2048) >> 12; \
- /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
- OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
- t7 += (t0*5417 + 4096) >> 13; \
- /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
- t0 -= (t7*5765 + 2048) >> 12; \
- /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
- OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
- t7 += (t0*2507 + 2048) >> 12; \
- t0 += t1; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t1 -= t0h; \
- t2 -= t3; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- t3 += t2h; \
- t5 -= t4; \
- t5h = OD_DCT_RSHIFT(t5, 1); \
- t4 += t5h; \
- t7 += t6; \
- t7h = OD_DCT_RSHIFT(t7, 1); \
- t6 = t7h - t6; \
- t4 = t7h - t4; \
- t7 -= t4; \
- t1 += t5h; \
- t5 = t1 - t5; \
- t6 += t2h; \
- t2 = t6 - t2; \
- t3 -= t0h; \
- t0 += t3; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
- t1 += (t6*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
- t6 -= (t1*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
- t1 += (t6*3259 + 8192) >> 14; \
- /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
- t5 += (t2*2737 + 2048) >> 12; \
- /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
- t2 -= (t5*473 + 256) >> 9; \
- /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
- t5 += (t2*2737 + 2048) >> 12; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
- t3 += (t4*3393 + 4096) >> 13; \
- /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
- t4 -= (t3*5793 + 4096) >> 13; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
- t3 += (t4*3393 + 4096) >> 13; \
- } \
- while (0)
-
-#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
- /* Embedded 8-point asymmetric Type-IV iDST. */ \
- do { \
- int t0h; \
- int t2h; \
- int t5h__; \
- int t7h__; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- t6 -= (t1*3393 + 4096) >> 13; \
- /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- t1 += (t6*5793 + 4096) >> 13; \
- /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- t6 -= (t1*3393 + 4096) >> 13; \
- /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- t5 -= (t2*2737 + 2048) >> 12; \
- /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t2 += (t5*473 + 256) >> 9; \
- /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- t5 -= (t2*2737 + 2048) >> 12; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- t4 -= (t3*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- t3 += (t4*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- t4 -= (t3*3259 + 8192) >> 14; \
- t0 -= t6; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t6 += t0h; \
- t2 = t3 - t2; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- t3 -= t2h; \
- t5 = t4 - t5; \
- t5h__ = OD_DCT_RSHIFT(t5, 1); \
- t4 -= t5h__; \
- t7 += t1; \
- t7h__ = OD_DCT_RSHIFT(t7, 1); \
- t1 = t7h__ - t1; \
- t3 = t7h__ - t3; \
- t7 -= t3; \
- t1 -= t5h__; \
- t5 += t1; \
- t6 -= t2h; \
- t2 += t6; \
- t4 += t0h; \
- t0 -= t4; \
- /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
- t7 -= (t0*2507 + 2048) >> 12; \
- /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
- t0 += (t7*5765 + 2048) >> 12; \
- /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
- t7 -= (t0*5417 + 4096) >> 13; \
- /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
- t1 += (t6*3525 + 2048) >> 12; \
- /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
- t6 += (t1*3363 + 4096) >> 13; \
- /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
- t1 -= (t6*12905 + 8192) >> 14; \
- /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
- t5 -= (t2*4379 + 8192) >> 14; \
- /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
- t2 += (t5*10217 + 4096) >> 13; \
- /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
- t5 -= (t2*4379 + 4096) >> 13; \
- /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
- t3 += (t4*851 + 4096) >> 13; \
- /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
- t4 += (t3*3675 + 2048) >> 12; \
- /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
- t3 -= (t4*1035 + 1024) >> 11; \
- } \
- while (0)
-
-#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
- s1, s9, s5, sd, s3, sb, s7, sf) \
- /* Embedded 16-point orthonormal Type-II fDCT. */ \
- do { \
- int s8h; \
- int sah; \
- int sch; \
- int seh; \
- int sfh; \
- sf = s0 - sf; \
- sfh = OD_DCT_RSHIFT(sf, 1); \
- s0 -= sfh; \
- se += s1; \
- seh = OD_DCT_RSHIFT(se, 1); \
- s1 = seh - s1; \
- sd = s2 - sd; \
- s2 -= OD_DCT_RSHIFT(sd, 1); \
- sc += s3; \
- sch = OD_DCT_RSHIFT(sc, 1); \
- s3 = sch - s3; \
- sb = s4 - sb; \
- s4 -= OD_DCT_RSHIFT(sb, 1); \
- sa += s5; \
- sah = OD_DCT_RSHIFT(sa, 1); \
- s5 = sah - s5; \
- s9 = s6 - s9; \
- s6 -= OD_DCT_RSHIFT(s9, 1); \
- s8 += s7; \
- s8h = OD_DCT_RSHIFT(s8, 1); \
- s7 = s8h - s7; \
- OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
- OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
- } \
- while (0)
-
-#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
- s1, s9, s5, sd, s3, sb, s7, sf) \
- /* Embedded 16-point orthonormal Type-II iDCT. */ \
- do { \
- int s1h; \
- int s3h; \
- int s5h; \
- int s7h; \
- int sfh; \
- OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
- OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
- sfh = OD_DCT_RSHIFT(sf, 1); \
- s0 += sfh; \
- sf = s0 - sf; \
- se = s1h - se; \
- s1 -= se; \
- s2 += OD_DCT_RSHIFT(sd, 1); \
- sd = s2 - sd; \
- sc = s3h - sc; \
- s3 -= sc; \
- s4 += OD_DCT_RSHIFT(sb, 1); \
- sb = s4 - sb; \
- sa = s5h - sa; \
- s5 -= sa; \
- s6 += OD_DCT_RSHIFT(s9, 1); \
- s9 = s6 - s9; \
- s8 = s7h - s8; \
- s7 -= s8; \
- } \
- while (0)
-
-#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
- t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
- /* Embedded 16-point asymmetric Type-II fDCT. */ \
- do { \
- t0 += tfh; \
- tf = t0 - tf; \
- t1 -= teh; \
- te += t1; \
- t2 += tdh; \
- td = t2 - td; \
- t3 -= tch; \
- tc += t3; \
- t4 += tbh; \
- tb = t4 - tb; \
- t5 -= tah; \
- ta += t5; \
- t6 += t9h; \
- t9 = t6 - t9; \
- t7 -= t8h; \
- t8 += t7; \
- OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
- OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
- } \
- while (0)
-
-#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
- t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
- /* Embedded 16-point asymmetric Type-II iDCT. */ \
- do { \
- OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
- OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
- t1 -= te; \
- t1h = OD_DCT_RSHIFT(t1, 1); \
- te += t1h; \
- t9 = t6 - t9; \
- t9h = OD_DCT_RSHIFT(t9, 1); \
- t6 -= t9h; \
- t5 -= ta; \
- t5h = OD_DCT_RSHIFT(t5, 1); \
- ta += t5h; \
- td = t2 - td; \
- tdh = OD_DCT_RSHIFT(td, 1); \
- t2 -= tdh; \
- t3 -= tc; \
- t3h = OD_DCT_RSHIFT(t3, 1); \
- tc += t3h; \
- tb = t4 - tb; \
- tbh = OD_DCT_RSHIFT(tb, 1); \
- t4 -= tbh; \
- t7 -= t8; \
- t7h = OD_DCT_RSHIFT(t7, 1); \
- t8 += t7h; \
- tf = t0 - tf; \
- tfh = OD_DCT_RSHIFT(tf, 1); \
- t0 -= tfh; \
- } \
- while (0)
-
-#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
- s1, s9, s5, sd, s3, sb, s7, sf) \
- /* Embedded 16-point orthonormal Type-IV fDST. */ \
- do { \
- int s0h; \
- int s2h; \
- int sdh; \
- int sfh; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
- s1 += (se*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
- se -= (s1*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
- s1 += (se*13573 + 16384) >> 15; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
- sd += (s2*21895 + 16384) >> 15; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
- s2 -= (sd*15137 + 8192) >> 14; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
- sd += (s2*21895 + 16384) >> 15; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
- sc += (s3*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
- s3 -= (sc*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
- sc += (s3*3259 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
- sa += (s5*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
- s5 -= (sa*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
- sa += (s5*13573 + 16384) >> 15; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
- s6 += (s9*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
- s9 -= (s6*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
- s6 += (s9*13573 + 16384) >> 15; \
- sf += se; \
- sfh = OD_DCT_RSHIFT(sf, 1); \
- se = sfh - se; \
- s0 += s1; \
- s0h = OD_DCT_RSHIFT(s0, 1); \
- s1 = s0h - s1; \
- s2 = s3 - s2; \
- s2h = OD_DCT_RSHIFT(s2, 1); \
- s3 -= s2h; \
- sd -= sc; \
- sdh = OD_DCT_RSHIFT(sd, 1); \
- sc += sdh; \
- sa = s4 - sa; \
- s4 -= OD_DCT_RSHIFT(sa, 1); \
- s5 += sb; \
- sb = OD_DCT_RSHIFT(s5, 1) - sb; \
- s8 += s6; \
- s6 -= OD_DCT_RSHIFT(s8, 1); \
- s7 = s9 - s7; \
- s9 -= OD_DCT_RSHIFT(s7, 1); \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
- s4 += (sb*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
- sb -= (s4*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
- s4 += (sb*6723 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
- sa += (s5*8757 + 8192) >> 14; \
- /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
- s5 -= (sa*6811 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
- sa += (s5*8757 + 8192) >> 14; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
- s6 += (s9*2485 + 4096) >> 13; \
- /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
- s9 -= (s6*4551 + 4096) >> 13; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
- s6 += (s9*2485 + 4096) >> 13; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
- s7 += (s8*3227 + 16384) >> 15; \
- /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
- OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
- s8 -= (s7*6393 + 16384) >> 15; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
- s7 += (s8*3227 + 16384) >> 15; \
- s1 -= s2h; \
- s2 += s1; \
- se += sdh; \
- sd = se - sd; \
- s3 += sfh; \
- sf -= s3; \
- sc = s0h - sc; \
- s0 -= sc; \
- sb += OD_DCT_RSHIFT(s8, 1); \
- s8 = sb - s8; \
- s4 += OD_DCT_RSHIFT(s7, 1); \
- s7 -= s4; \
- s6 += OD_DCT_RSHIFT(s5, 1); \
- s5 = s6 - s5; \
- s9 -= OD_DCT_RSHIFT(sa, 1); \
- sa += s9; \
- s8 += s0; \
- s0 -= OD_DCT_RSHIFT(s8, 1); \
- sf += s7; \
- s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
- s1 -= s6; \
- s6 += OD_DCT_RSHIFT(s1, 1); \
- s9 += se; \
- se = OD_DCT_RSHIFT(s9, 1) - se; \
- s2 += sa; \
- sa = OD_DCT_RSHIFT(s2, 1) - sa; \
- s5 += sd; \
- sd -= OD_DCT_RSHIFT(s5, 1); \
- s4 = sc - s4; \
- sc -= OD_DCT_RSHIFT(s4, 1); \
- s3 -= sb; \
- sb += OD_DCT_RSHIFT(s3, 1); \
- /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
- s0 -= (sf*2799 + 2048) >> 12; \
- /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
- sf += (s0*2893 + 1024) >> 11; \
- /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
- s0 -= (sf*5397 + 4096) >> 13; \
- /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
- se += (s1*41 + 32) >> 6; \
- /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
- s1 -= (se*2865 + 1024) >> 11; \
- /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
- se += (s1*4641 + 4096) >> 13; \
- /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
- sd += (s2*2473 + 2048) >> 12; \
- /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
- s2 -= (sd*5619 + 2048) >> 12; \
- /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
- sd += (s2*7839 + 8192) >> 14; \
- /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
- sc -= (s3*5747 + 4096) >> 13; \
- /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
- OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
- s3 += (sc*3903 + 4096) >> 13; \
- /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
- sc += (s3*5701 + 4096) >> 13; \
- /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
- sb += (s4*4471 + 4096) >> 13; \
- /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
- s4 -= (sb*1309 + 512) >> 10; \
- /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
- sb += (s4*5067 + 8192) >> 14; \
- /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
- sa -= (s5*2217 + 2048) >> 12; \
- /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
- OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
- s5 += (sa*1489 + 1024) >> 11; \
- /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
- sa += (s5*75 + 128) >> 8; \
- /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
- s6 -= (s9*2087 + 2048) >> 12; \
- /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
- s9 += (s6*4653 + 2048) >> 12; \
- /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
- s6 -= (s9*4545 + 16384) >> 15; \
- /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
- s7 += (s8*2053 + 2048) >> 12; \
- /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
- s8 -= (s7*1945 + 1024) >> 11; \
- /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
- OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
- s7 -= (s8*1651 + 16384) >> 15; \
- } \
- while (0)
-
-#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
- s1, s9, s5, sd, s3, sb, s7, sf) \
- /* Embedded 16-point orthonormal Type-IV iDST. */ \
- do { \
- int s0h; \
- int s4h; \
- int sbh; \
- int sfh; \
- /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
- se += (s1*1651 + 16384) >> 15; \
- /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
- s1 += (se*1945 + 1024) >> 11; \
- /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
- se -= (s1*2053 + 2048) >> 12; \
- /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
- s6 += (s9*4545 + 16384) >> 15; \
- /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
- s9 -= (s6*4653 + 2048) >> 12; \
- /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
- s6 += (s9*2087 + 2048) >> 12; \
- /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
- s5 -= (sa*75 + 128) >> 8; \
- /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
- sa -= (s5*1489 + 1024) >> 11; \
- /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
- s5 += (sa*2217 + 2048) >> 12; \
- /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
- sd -= (s2*5067 + 8192) >> 14; \
- /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
- s2 += (sd*1309 + 512) >> 10; \
- /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
- sd -= (s2*4471 + 4096) >> 13; \
- /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
- s3 -= (sc*5701 + 4096) >> 13; \
- /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
- sc -= (s3*3903 + 4096) >> 13; \
- /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
- s3 += (sc*5747 + 4096) >> 13; \
- /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
- sb -= (s4*7839 + 8192) >> 14; \
- /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
- s4 += (sb*5619 + 2048) >> 12; \
- /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
- sb -= (s4*2473 + 2048) >> 12; \
- /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
- s7 -= (s8*4641 + 4096) >> 13; \
- /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
- s8 += (s7*2865 + 1024) >> 11; \
- /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
- s7 -= (s8*41 + 32) >> 6; \
- /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
- s0 += (sf*5397 + 4096) >> 13; \
- /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
- sf -= (s0*2893 + 1024) >> 11; \
- /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
- s0 += (sf*2799 + 2048) >> 12; \
- sd -= OD_DCT_RSHIFT(sc, 1); \
- sc += sd; \
- s3 += OD_DCT_RSHIFT(s2, 1); \
- s2 = s3 - s2; \
- sb += OD_DCT_RSHIFT(sa, 1); \
- sa -= sb; \
- s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
- s4 -= s5; \
- s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
- s9 -= s7; \
- s6 -= OD_DCT_RSHIFT(s8, 1); \
- s8 += s6; \
- se = OD_DCT_RSHIFT(sf, 1) - se; \
- sf -= se; \
- s0 += OD_DCT_RSHIFT(s1, 1); \
- s1 -= s0; \
- s5 -= s9; \
- s9 += OD_DCT_RSHIFT(s5, 1); \
- sa = s6 - sa; \
- s6 -= OD_DCT_RSHIFT(sa, 1); \
- se += s2; \
- s2 -= OD_DCT_RSHIFT(se, 1); \
- s1 = sd - s1; \
- sd -= OD_DCT_RSHIFT(s1, 1); \
- s0 += s3; \
- s0h = OD_DCT_RSHIFT(s0, 1); \
- s3 = s0h - s3; \
- sf += sc; \
- sfh = OD_DCT_RSHIFT(sf, 1); \
- sc -= sfh; \
- sb = s7 - sb; \
- sbh = OD_DCT_RSHIFT(sb, 1); \
- s7 -= sbh; \
- s4 -= s8; \
- s4h = OD_DCT_RSHIFT(s4, 1); \
- s8 += s4h; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- se -= (s1*3227 + 16384) >> 15; \
- /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
- s1 += (se*6393 + 16384) >> 15; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- se -= (s1*3227 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- s6 -= (s9*2485 + 4096) >> 13; \
- /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- s9 += (s6*4551 + 4096) >> 13; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- s6 -= (s9*2485 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- s5 -= (sa*8757 + 8192) >> 14; \
- /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- sa += (s5*6811 + 4096) >> 13; \
- /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
- s5 -= (sa*8757 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
- s2 -= (sd*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- sd += (s2*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- s2 -= (sd*6723 + 4096) >> 13; \
- s9 += OD_DCT_RSHIFT(se, 1); \
- se = s9 - se; \
- s6 += OD_DCT_RSHIFT(s1, 1); \
- s1 -= s6; \
- sd = OD_DCT_RSHIFT(sa, 1) - sd; \
- sa -= sd; \
- s2 += OD_DCT_RSHIFT(s5, 1); \
- s5 = s2 - s5; \
- s3 -= sbh; \
- sb += s3; \
- sc += s4h; \
- s4 = sc - s4; \
- s8 = s0h - s8; \
- s0 -= s8; \
- s7 = sfh - s7; \
- sf -= s7; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s6 -= (s9*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- s9 += (s6*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s6 -= (s9*13573 + 16384) >> 15; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s5 -= (sa*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- sa += (s5*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s5 -= (sa*13573 + 16384) >> 15; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- s3 -= (sc*3259 + 8192) >> 14; \
- /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
- sc += (s3*3135 + 4096) >> 13; \
- /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
- s3 -= (sc*3259 + 8192) >> 14; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- sb -= (s4*21895 + 16384) >> 15; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- s4 += (sb*15137 + 8192) >> 14; \
- /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
- sb -= (s4*21895 + 16384) >> 15; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s8 -= (s7*13573 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
- s7 += (s8*11585 + 8192) >> 14; \
- /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
- s8 -= (s7*13573 + 16384) >> 15; \
- } \
- while (0)
-
-/* TODO: rewrite this to match OD_FDST_16. */
-#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
- t1, t9, t5, td, t3, tb, t7, t7h, tf) \
- /* Embedded 16-point asymmetric Type-IV fDST. */ \
- do { \
- int t2h; \
- int t3h; \
- int t6h; \
- int t8h; \
- int t9h; \
- int tch; \
- int tdh; \
- /* TODO: Can we move these into another operation */ \
- t8 = -t8; \
- t9 = -t9; \
- ta = -ta; \
- tb = -tb; \
- td = -td; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
- t1 -= (te*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
- te += (t1*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
- t1 -= (te*13573 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
- t2 += (td*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
- td -= (t2*15137 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
- t2 += (td*14341 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
- tc -= (t3*14341 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
- t3 += (tc*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
- tc -= (t3*4161 + 8192) >> 14; \
- te = t0h - te; \
- t0 -= te; \
- tf = OD_DCT_RSHIFT(t1, 1) - tf; \
- t1 -= tf; \
- /* TODO: Can we move this into another operation */ \
- tc = -tc; \
- t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
- tc -= t2; \
- t3 = OD_DCT_RSHIFT(td, 1) - t3; \
- td = t3 - td; \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
- t9 -= (t6*7489 + 4096) >> 13; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
- t6 += (t9*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
- t9 += (t6*19195 + 16384) >> 15; \
- t8 += OD_DCT_RSHIFT(t9, 1); \
- t9 -= t8; \
- t6 = t7h - t6; \
- t7 -= t6; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
- t8 += (t7*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
- t7 -= (t8*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
- t8 += (t7*6723 + 4096) >> 13; \
- /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
- t9 += (t6*17515 + 16384) >> 15; \
- /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
- t6 -= (t9*13623 + 8192) >> 14; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
- t9 += (t6*17515 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
- t5 += (ta*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
- ta -= (t5*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
- t5 += (ta*13573 + 8192) >> 14; \
- tb += OD_DCT_RSHIFT(t5, 1); \
- t5 = tb - t5; \
- ta += t4h; \
- t4 -= ta; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
- ta += (t5*2485 + 4096) >> 13; \
- /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
- t5 -= (ta*18205 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
- ta += (t5*2485 + 4096) >> 13; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
- tb -= (t4*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
- t4 += (tb*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
- tb -= (t4*6723 + 4096) >> 13; \
- /* TODO: Can we move this into another operation */ \
- t5 = -t5; \
- tc -= tf; \
- tch = OD_DCT_RSHIFT(tc, 1); \
- tf += tch; \
- t3 += t0; \
- t3h = OD_DCT_RSHIFT(t3, 1); \
- t0 -= t3h; \
- td -= t1; \
- tdh = OD_DCT_RSHIFT(td, 1); \
- t1 += tdh; \
- t2 += te; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- te -= t2h; \
- t8 += t4; \
- t8h = OD_DCT_RSHIFT(t8, 1); \
- t4 = t8h - t4; \
- t7 = tb - t7; \
- t7h = OD_DCT_RSHIFT(t7, 1); \
- tb = t7h - tb; \
- t6 -= ta; \
- t6h = OD_DCT_RSHIFT(t6, 1); \
- ta += t6h; \
- t9 = t5 - t9; \
- t9h = OD_DCT_RSHIFT(t9, 1); \
- t5 -= t9h; \
- t0 -= t7h; \
- t7 += t0; \
- tf += t8h; \
- t8 -= tf; \
- te -= t6h; \
- t6 += te; \
- t1 += t9h; \
- t9 -= t1; \
- tb -= tch; \
- tc += tb; \
- t4 += t3h; \
- t3 -= t4; \
- ta -= tdh; \
- td += ta; \
- t5 = t2h - t5; \
- t2 -= t5; \
- /* TODO: Can we move these into another operation */ \
- t8 = -t8; \
- t9 = -t9; \
- ta = -ta; \
- tb = -tb; \
- tc = -tc; \
- td = -td; \
- tf = -tf; \
- /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
- OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
- t0 -= (tf*7799 + 4096) >> 13; \
- /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
- OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
- tf += (t0*4091 + 2048) >> 12; \
- /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
- OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
- t0 -= (tf*7799 + 4096) >> 13; \
- /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
- OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
- t1 += (te*2417 + 16384) >> 15; \
- /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
- te -= (t1*601 + 2048) >> 12; \
- /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
- OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
- t1 += (te*2417 + 16384) >> 15; \
- /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
- OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
- t7 -= (t8*14525 + 16384) >> 15; \
- /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
- t8 += (t7*3035 + 2048) >> 12; \
- /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
- OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
- t7 -= (t8*7263 + 8192) >> 14; \
- /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
- OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
- t2 -= (td*6393 + 4096) >> 13; \
- /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
- td += (t2*3973 + 2048) >> 12; \
- /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
- OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
- t2 -= (td*6393 + 4096) >> 13; \
- /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
- t5 -= (ta*9281 + 8192) >> 14; \
- /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
- ta += (t5*7027 + 4096) >> 13; \
- /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
- t5 -= (ta*9281 + 8192) >> 14; \
- /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
- t3 -= (tc*11539 + 8192) >> 14; \
- /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
- tc += (t3*7713 + 4096) >> 13; \
- /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
- t3 -= (tc*11539 + 8192) >> 14; \
- /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
- t4 -= (tb*10375 + 8192) >> 14; \
- /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
- tb += (t4*7405 + 4096) >> 13; \
- /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
- t4 -= (tb*10375 + 8192) >> 14; \
- /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
- t6 -= (t9*8247 + 8192) >> 14; \
- /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
- t9 += (t6*1645 + 1024) >> 11; \
- /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
- t6 -= (t9*8247 + 8192) >> 14; \
- } \
- while (0)
-
-#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
- t1, t9, t5, td, t3, tb, t7, tf) \
- /* Embedded 16-point asymmetric Type-IV iDST. */ \
- do { \
- int t1h_; \
- int t3h_; \
- int t4h; \
- int t6h; \
- int t9h_; \
- int tbh_; \
- int tch; \
- /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
- t6 += (t9*8247 + 8192) >> 14; \
- /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
- t9 -= (t6*1645 + 1024) >> 11; \
- /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
- t6 += (t9*8247 + 8192) >> 14; \
- /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
- t2 += (td*10375 + 8192) >> 14; \
- /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
- td -= (t2*7405 + 4096) >> 13; \
- /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
- t2 += (td*10375 + 8192) >> 14; \
- /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
- tc += (t3*11539 + 8192) >> 14; \
- /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
- t3 -= (tc*7713 + 4096) >> 13; \
- /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
- tc += (t3*11539 + 8192) >> 14; \
- /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
- ta += (t5*9281 + 8192) >> 14; \
- /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
- t5 -= (ta*7027 + 4096) >> 13; \
- /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
- ta += (t5*9281 + 8192) >> 14; \
- /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
- t4 += (tb*6393 + 4096) >> 13; \
- /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
- tb -= (t4*3973 + 2048) >> 12; \
- /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
- t4 += (tb*6393 + 4096) >> 13; \
- /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
- te += (t1*7263 + 8192) >> 14; \
- /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
- t1 -= (te*3035 + 2048) >> 12; \
- /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
- te += (t1*14525 + 16384) >> 15; \
- /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
- t8 -= (t7*2417 + 16384) >> 15; \
- /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
- t7 += (t8*601 + 2048) >> 12; \
- /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
- t8 -= (t7*2417 + 16384) >> 15; \
- /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
- t0 += (tf*7799 + 4096) >> 13; \
- /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
- tf -= (t0*4091 + 2048) >> 12; \
- /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
- t0 += (tf*7799 + 4096) >> 13; \
- /* TODO: Can we move these into another operation */ \
- t1 = -t1; \
- t3 = -t3; \
- t5 = -t5; \
- t9 = -t9; \
- tb = -tb; \
- td = -td; \
- tf = -tf; \
- t4 += ta; \
- t4h = OD_DCT_RSHIFT(t4, 1); \
- ta = t4h - ta; \
- tb -= t5; \
- tbh_ = OD_DCT_RSHIFT(tb, 1); \
- t5 += tbh_; \
- tc += t2; \
- tch = OD_DCT_RSHIFT(tc, 1); \
- t2 -= tch; \
- t3 -= td; \
- t3h_ = OD_DCT_RSHIFT(t3, 1); \
- td += t3h_; \
- t9 += t8; \
- t9h_ = OD_DCT_RSHIFT(t9, 1); \
- t8 -= t9h_; \
- t6 -= t7; \
- t6h = OD_DCT_RSHIFT(t6, 1); \
- t7 += t6h; \
- t1 += tf; \
- t1h_ = OD_DCT_RSHIFT(t1, 1); \
- tf -= t1h_; \
- te -= t0; \
- teh = OD_DCT_RSHIFT(te, 1); \
- t0 += teh; \
- ta += t9h_; \
- t9 = ta - t9; \
- t5 -= t6h; \
- t6 += t5; \
- td = teh - td; \
- te = td - te; \
- t2 = t1h_ - t2; \
- t1 -= t2; \
- t7 += t4h; \
- t4 -= t7; \
- t8 -= tbh_; \
- tb += t8; \
- t0 += tch; \
- tc -= t0; \
- tf -= t3h_; \
- t3 += tf; \
- /* TODO: Can we move this into another operation */ \
- ta = -ta; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- td += (t2*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- t2 -= (td*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
- td += (t2*6723 + 4096) >> 13; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- t5 -= (ta*2485 + 4096) >> 13; \
- /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- ta += (t5*18205 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- t5 -= (ta*2485 + 4096) >> 13; \
- t2 += t5; \
- t2h = OD_DCT_RSHIFT(t2, 1); \
- t5 -= t2h; \
- ta = td - ta; \
- td -= OD_DCT_RSHIFT(ta, 1); \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- ta -= (t5*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- t5 += (ta*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- ta -= (t5*13573 + 8192) >> 14; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
- t9 -= (t6*17515 + 16384) >> 15; \
- /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
- t6 += (t9*13623 + 8192) >> 14; \
- /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
- t9 -= (t6*17515 + 16384) >> 15; \
- /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
- t1 -= (te*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
- te += (t1*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
- t1 -= (te*6723 + 4096) >> 13; \
- te += t6; \
- teh = OD_DCT_RSHIFT(te, 1); \
- t6 = teh - t6; \
- t9 += t1; \
- t1 -= OD_DCT_RSHIFT(t9, 1); \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- t9 -= (t6*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- t6 -= (t9*11585 + 8192) >> 14; \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- t9 += (t6*7489 + 4096) >> 13; \
- tb = tc - tb; \
- tc = OD_DCT_RSHIFT(tb, 1) - tc; \
- t3 += t4; \
- t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
- /* TODO: Can we move this into another operation */ \
- t3 = -t3; \
- t8 += tf; \
- tf = OD_DCT_RSHIFT(t8, 1) - tf; \
- t0 += t7; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t7 = t0h - t7; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- t3 += (tc*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- tc -= (t3*15137 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- t3 += (tc*14341 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- t4 -= (tb*14341 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- tb += (t4*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- t4 -= (tb*4161 + 8192) >> 14; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- t8 += (t7*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- t7 -= (t8*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- t8 += (t7*13573 + 8192) >> 14; \
- /* TODO: Can we move these into another operation */ \
- t1 = -t1; \
- t5 = -t5; \
- t9 = -t9; \
- tb = -tb; \
- td = -td; \
- } \
- while (0)
-
-#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
- te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
- /* Embedded 32-point orthonormal Type-II fDCT. */ \
- do { \
- int tgh; \
- int thh; \
- int tih; \
- int tkh; \
- int tmh; \
- int tnh; \
- int toh; \
- int tqh; \
- int tsh; \
- int tuh; \
- int tvh; \
- tv = t0 - tv; \
- tvh = OD_DCT_RSHIFT(tv, 1); \
- t0 -= tvh; \
- tu += t1; \
- tuh = OD_DCT_RSHIFT(tu, 1); \
- t1 = tuh - t1; \
- tt = t2 - tt; \
- t2 -= OD_DCT_RSHIFT(tt, 1); \
- ts += t3; \
- tsh = OD_DCT_RSHIFT(ts, 1); \
- t3 = tsh - t3; \
- tr = t4 - tr; \
- t4 -= OD_DCT_RSHIFT(tr, 1); \
- tq += t5; \
- tqh = OD_DCT_RSHIFT(tq, 1); \
- t5 = tqh - t5; \
- tp = t6 - tp; \
- t6 -= OD_DCT_RSHIFT(tp, 1); \
- to += t7; \
- toh = OD_DCT_RSHIFT(to, 1); \
- t7 = toh - t7; \
- tn = t8 - tn; \
- tnh = OD_DCT_RSHIFT(tn, 1); \
- t8 -= tnh; \
- tm += t9; \
- tmh = OD_DCT_RSHIFT(tm, 1); \
- t9 = tmh - t9; \
- tl = ta - tl; \
- ta -= OD_DCT_RSHIFT(tl, 1); \
- tk += tb; \
- tkh = OD_DCT_RSHIFT(tk, 1); \
- tb = tkh - tb; \
- tj = tc - tj; \
- tc -= OD_DCT_RSHIFT(tj, 1); \
- ti += td; \
- tih = OD_DCT_RSHIFT(ti, 1); \
- td = tih - td; \
- th = te - th; \
- thh = OD_DCT_RSHIFT(th, 1); \
- te -= thh; \
- tg += tf; \
- tgh = OD_DCT_RSHIFT(tg, 1); \
- tf = tgh - tf; \
- OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
- t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
- OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
- tt, td, tl, t5, tp, t9, th, thh, t1); \
- } \
- while (0)
-
-#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
- te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
- /* Embedded 32-point orthonormal Type-II iDCT. */ \
- do { \
- int t1h; \
- int t3h; \
- int t5h; \
- int t7h; \
- int t9h; \
- int tbh; \
- int tdh; \
- int tfh; \
- int thh; \
- int tth; \
- int tvh; \
- OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
- tu, tm, tq, ti, ts, tk, to, tg); \
- OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
- t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
- tu = t1h - tu; \
- t1 -= tu; \
- te += thh; \
- th = te - th; \
- tm = t9h - tm; \
- t9 -= tm; \
- t6 += OD_DCT_RSHIFT(tp, 1); \
- tp = t6 - tp; \
- tq = t5h - tq; \
- t5 -= tq; \
- ta += OD_DCT_RSHIFT(tl, 1); \
- tl = ta - tl; \
- ti = tdh - ti; \
- td -= ti; \
- t2 += tth; \
- tt = t2 - tt; \
- ts = t3h - ts; \
- t3 -= ts; \
- tc += OD_DCT_RSHIFT(tj, 1); \
- tj = tc - tj; \
- tk = tbh - tk; \
- tb -= tk; \
- t4 += OD_DCT_RSHIFT(tr, 1); \
- tr = t4 - tr; \
- to = t7h - to; \
- t7 -= to; \
- t8 += OD_DCT_RSHIFT(tn, 1); \
- tn = t8 - tn; \
- tg = tfh - tg; \
- tf -= tg; \
- t0 += tvh; \
- tv = t0 - tv; \
- } \
- while (0)
-
-#if CONFIG_TX64X64
-#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
- t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
- t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
- t7, tn, tnh, tf, tv, tvh) \
- /* Embedded 32-point asymmetric Type-II fDCT. */ \
- do { \
- t0 += tvh; \
- tv = t0 - tv; \
- t1 = tuh - t1; \
- tu -= t1; \
- t2 += tth; \
- tt = t2 - tt; \
- t3 = tsh - t3; \
- ts -= t3; \
- t4 += trh; \
- tr = t4 - tr; \
- t5 = tqh - t5; \
- tq -= t5; \
- t6 += tph; \
- tp = t6 - tp; \
- t7 = toh - t7; \
- to -= t7; \
- t8 += tnh; \
- tn = t8 - tn; \
- t9 = tmh - t9; \
- tm -= t9; \
- ta += tlh; \
- tl = ta - tl; \
- tb = tkh - tb; \
- tk -= tb; \
- tc += tjh; \
- tj = tc - tj; \
- td = tih - td; \
- ti -= td; \
- te += thh; \
- th = te - th; \
- tf = tgh - tf; \
- tg -= tf; \
- OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
- t2, ti, ta, tq, t6, tm, te, tu); \
- OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
- tt, td, tl, t5, tp, t9, th, t1); \
- } \
- while (0)
-
-#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
- t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
- td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
- tf, tfh, tv, tvh) \
- /* Embedded 32-point asymmetric Type-II iDCT. */ \
- do { \
- OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
- tu, tm, tq, ti, ts, tk, to, tg); \
- OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
- t1, t9, t5, td, t3, tb, t7, tf); \
- tv = t0 - tv; \
- tvh = OD_DCT_RSHIFT(tv, 1); \
- t0 -= tvh; \
- t1 += tu; \
- t1h = OD_DCT_RSHIFT(t1, 1); \
- tu = t1h - tu; \
- tt = t2 - tt; \
- tth = OD_DCT_RSHIFT(tt, 1); \
- t2 -= tth; \
- t3 += ts; \
- t3h = OD_DCT_RSHIFT(t3, 1); \
- ts = t3h - ts; \
- tr = t4 - tr; \
- trh = OD_DCT_RSHIFT(tr, 1); \
- t4 -= trh; \
- t5 += tq; \
- t5h = OD_DCT_RSHIFT(t5, 1); \
- tq = t5h - tq; \
- tp = t6 - tp; \
- tph = OD_DCT_RSHIFT(tp, 1); \
- t6 -= tph; \
- t7 += to; \
- t7h = OD_DCT_RSHIFT(t7, 1); \
- to = t7h - to; \
- tn = t8 - tn; \
- tnh = OD_DCT_RSHIFT(tn, 1); \
- t8 -= tnh; \
- t9 += tm; \
- t9h = OD_DCT_RSHIFT(t9, 1); \
- tm = t9h - tm; \
- tl = ta - tl; \
- tlh = OD_DCT_RSHIFT(tl, 1); \
- ta -= tlh; \
- tb += tk; \
- tbh = OD_DCT_RSHIFT(tb, 1); \
- tk = tbh - tk; \
- tj = tc - tj; \
- tjh = OD_DCT_RSHIFT(tj, 1); \
- tc -= tjh; \
- td += ti; \
- tdh = OD_DCT_RSHIFT(td, 1); \
- ti = tdh - ti; \
- th = te - th; \
- thh = OD_DCT_RSHIFT(th, 1); \
- te -= thh; \
- tf += tg; \
- tfh = OD_DCT_RSHIFT(tf, 1); \
- tg = tfh - tg; \
- } \
- while (0)
-
-#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
- tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
- /* Embedded 32-point asymmetric Type-IV fDST. */ \
- do { \
- int t0h; \
- int t1h; \
- int t4h; \
- int t5h; \
- int tqh; \
- int trh; \
- int tuh; \
- int tvh; \
- \
- tu = -tu; \
- \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
- t5 -= (tq*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
- tq += (t5*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
- t5 -= (tq*13573 + 8192) >> 14; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
- tp += (t6*29957 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
- t6 -= (tp*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
- tp -= (t6*19195 + 16384) >> 15; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
- tu += (t1*29957 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
- t1 -= (tu*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
- tu -= (t1*19195 + 16384) >> 15; \
- /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
- tt += (t2*28681 + 16384) >> 15; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
- t2 -= (tt*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
- tt += (t2*4161 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
- t3 += (ts*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
- ts -= (t3*15137 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
- t3 += (ts*14341 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
- t9 -= (tm*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
- tm -= (t9*11585 + 8192) >> 14; \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
- t9 += (tm*7489 + 4096) >> 13; \
- /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
- OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
- ta += (tl*3259 + 4096) >> 13; \
- /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
- tl -= (ta*3135 + 8192) >> 14; \
- /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
- OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
- ta += (tl*3259 + 4096) >> 13; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
- tb += (tk*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
- tk -= (tb*15137 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
- tb += (tk*14341 + 8192) >> 14; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
- th += (te*29957 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
- te -= (th*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
- th -= (te*19195 + 16384) >> 15; \
- /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
- tj += (tc*28681 + 16384) >> 15; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
- tc -= (tj*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
- tj += (tc*4161 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
- td += (ti*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
- ti -= (td*15137 + 8192) >> 14; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
- td += (ti*14341 + 8192) >> 14; \
- \
- t1 = -t1; \
- t2 = -t2; \
- t3 = -t3; \
- td = -td; \
- tg = -tg; \
- to = -to; \
- ts = -ts; \
- \
- tr -= OD_DCT_RSHIFT(t5, 1); \
- t5 += tr; \
- tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
- t4 += tq; \
- t6 -= OD_DCT_RSHIFT(t7, 1); \
- t7 += t6; \
- to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
- tp += to; \
- t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
- t0 -= t1; \
- tv -= OD_DCT_RSHIFT(tu, 1); \
- tu += tv; \
- t3 -= OD_DCT_RSHIFT(tt, 1); \
- tt += t3; \
- t2 += OD_DCT_RSHIFT(ts, 1); \
- ts -= t2; \
- t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
- t8 += t9; \
- tn += OD_DCT_RSHIFT(tm, 1); \
- tm -= tn; \
- tb += OD_DCT_RSHIFT(ta, 1); \
- ta -= tb; \
- tl -= OD_DCT_RSHIFT(tk, 1); \
- tk += tl; \
- te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
- tf += te; \
- tg -= OD_DCT_RSHIFT(th, 1); \
- th += tg; \
- tc -= OD_DCT_RSHIFT(ti, 1); \
- ti += tc; \
- td += OD_DCT_RSHIFT(tj, 1); \
- tj -= td; \
- \
- t4 = -t4; \
- \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
- OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
- t4 += (tr*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
- OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
- tr -= (t4*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
- OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
- t4 += (tr*6723 + 4096) >> 13; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
- OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
- t5 += (tq*17515 + 16384) >> 15; \
- /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
- OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
- tq -= (t5*13623 + 8192) >> 14; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
- OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
- t5 += (tq*17515 + 16384) >> 15; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
- t7 += (to*3227 + 16384) >> 15; \
- /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
- OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
- to -= (t7*6393 + 16384) >> 15; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
- t7 += (to*3227 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
- t6 += (tp*2485 + 4096) >> 13; \
- /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
- tp -= (t6*18205 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
- t6 += (tp*2485 + 4096) >> 13; \
- \
- t5 = -t5; \
- \
- tr += to; \
- trh = OD_DCT_RSHIFT(tr, 1); \
- to -= trh; \
- t4 += t7; \
- t4h = OD_DCT_RSHIFT(t4, 1); \
- t7 -= t4h; \
- t5 += tp; \
- t5h = OD_DCT_RSHIFT(t5, 1); \
- tp -= t5h; \
- tq += t6; \
- tqh = OD_DCT_RSHIFT(tq, 1); \
- t6 -= tqh; \
- t0 -= t3; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t3 += t0h; \
- tv -= ts; \
- tvh = OD_DCT_RSHIFT(tv, 1); \
- ts += tvh; \
- tu += tt; \
- tuh = OD_DCT_RSHIFT(tu, 1); \
- tt -= tuh; \
- t1 -= t2; \
- t1h = OD_DCT_RSHIFT(t1, 1); \
- t2 += t1h; \
- t8 += tb; \
- tb -= OD_DCT_RSHIFT(t8, 1); \
- tn += tk; \
- tk -= OD_DCT_RSHIFT(tn, 1); \
- t9 += tl; \
- tl -= OD_DCT_RSHIFT(t9, 1); \
- tm -= ta; \
- ta += OD_DCT_RSHIFT(tm, 1); \
- tc -= tf; \
- tf += OD_DCT_RSHIFT(tc, 1); \
- tj += tg; \
- tg -= OD_DCT_RSHIFT(tj, 1); \
- td -= te; \
- te += OD_DCT_RSHIFT(td, 1); \
- ti += th; \
- th -= OD_DCT_RSHIFT(ti, 1); \
- \
- t9 = -t9; \
- tl = -tl; \
- \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
- t8 += (tn*805 + 8192) >> 14; \
- /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
- OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
- tn -= (t8*803 + 4096) >> 13; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
- t8 += (tn*805 + 8192) >> 14; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
- tk += (tb*11725 + 16384) >> 15; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
- OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
- tb -= (tk*5197 + 4096) >> 13; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
- tk += (tb*11725 + 16384) >> 15; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
- OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
- ta += (tl*2455 + 2048) >> 12; \
- /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
- OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
- tl -= (ta*14449 + 8192) >> 14; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
- OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
- ta += (tl*2455 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
- t9 += (tm*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
- OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
- tm -= (t9*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
- t9 += (tm*4861 + 16384) >> 15; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
- tf += (tg*805 + 8192) >> 14; \
- /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
- OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
- tg -= (tf*803 + 4096) >> 13; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
- tf += (tg*805 + 8192) >> 14; \
- /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
- tc += (tj*2931 + 4096) >> 13; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
- OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
- tj -= (tc*5197 + 4096) >> 13; \
- /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
- tc += (tj*2931 + 4096) >> 13; \
- /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
- OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
- td += (ti*513 + 1024) >> 11; \
- /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
- OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
- ti -= (td*7723 + 8192) >> 14; \
- /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
- OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
- td += (ti*513 + 1024) >> 11; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
- te += (th*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
- OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
- th -= (te*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
- te += (th*4861 + 16384) >> 15; \
- \
- ta = -ta; \
- tb = -tb; \
- \
- tt += t5h; \
- t5 -= tt; \
- t2 -= tqh; \
- tq += t2; \
- tp += t1h; \
- t1 -= tp; \
- t6 -= tuh; \
- tu += t6; \
- t7 += tvh; \
- tv -= t7; \
- to += t0h; \
- t0 -= to; \
- t3 -= t4h; \
- t4 += t3; \
- ts += trh; \
- tr -= ts; \
- tf -= OD_DCT_RSHIFT(tn, 1); \
- tn += tf; \
- tg -= OD_DCT_RSHIFT(t8, 1); \
- t8 += tg; \
- tk += OD_DCT_RSHIFT(tc, 1); \
- tc -= tk; \
- tb += OD_DCT_RSHIFT(tj, 1); \
- tj -= tb; \
- ta += OD_DCT_RSHIFT(ti, 1); \
- ti -= ta; \
- tl += OD_DCT_RSHIFT(td, 1); \
- td -= tl; \
- te -= OD_DCT_RSHIFT(tm, 1); \
- tm += te; \
- th -= OD_DCT_RSHIFT(t9, 1); \
- t9 += th; \
- ta -= t5; \
- t5 += OD_DCT_RSHIFT(ta, 1); \
- tq -= tl; \
- tl += OD_DCT_RSHIFT(tq, 1); \
- t2 -= ti; \
- ti += OD_DCT_RSHIFT(t2, 1); \
- td -= tt; \
- tt += OD_DCT_RSHIFT(td, 1); \
- tm += tp; \
- tp -= OD_DCT_RSHIFT(tm, 1); \
- t6 += t9; \
- t9 -= OD_DCT_RSHIFT(t6, 1); \
- te -= tu; \
- tu += OD_DCT_RSHIFT(te, 1); \
- t1 -= th; \
- th += OD_DCT_RSHIFT(t1, 1); \
- t0 -= tg; \
- tg += OD_DCT_RSHIFT(t0, 1); \
- tf += tv; \
- tv -= OD_DCT_RSHIFT(tf, 1); \
- t8 -= t7; \
- t7 += OD_DCT_RSHIFT(t8, 1); \
- to -= tn; \
- tn += OD_DCT_RSHIFT(to, 1); \
- t4 -= tk; \
- tk += OD_DCT_RSHIFT(t4, 1); \
- tb -= tr; \
- tr += OD_DCT_RSHIFT(tb, 1); \
- t3 -= tj; \
- tj += OD_DCT_RSHIFT(t3, 1); \
- tc -= ts; \
- ts += OD_DCT_RSHIFT(tc, 1); \
- \
- tr = -tr; \
- ts = -ts; \
- tt = -tt; \
- tu = -tu; \
- \
- /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
- tv += (t0*2847 + 2048) >> 12; \
- /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
- t0 -= (tv*5791 + 2048) >> 12; \
- /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
- tv += (t0*5593 + 4096) >> 13; \
- /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
- tg -= (tf*4099 + 4096) >> 13; \
- /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
- tf += (tg*1997 + 1024) >> 11; \
- /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
- tg += (tf*815 + 16384) >> 15; \
- /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
- tn -= (t8*2527 + 2048) >> 12; \
- /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
- t8 += (tn*4695 + 4096) >> 13; \
- /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
- tn += (t8*4187 + 4096) >> 13; \
- /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
- t7 += (to*5477 + 4096) >> 13; \
- /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
- to -= (t7*4169 + 4096) >> 13; \
- /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
- t7 -= (to*2571 + 2048) >> 12; \
- /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
- tt += (t2*5331 + 4096) >> 13; \
- /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
- t2 -= (tt*5749 + 2048) >> 12; \
- /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
- tt += (t2*2413 + 2048) >> 12; \
- /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
- ti -= (td*4167 + 4096) >> 13; \
- /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
- td += (ti*891 + 512) >> 10; \
- /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
- ti += (td*4327 + 16384) >> 15; \
- /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
- tl -= (ta*2261 + 2048) >> 12; \
- /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
- ta += (tl*2855 + 2048) >> 12; \
- /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
- tl += (ta*5417 + 8192) >> 14; \
- /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
- t5 += (tq*3459 + 2048) >> 12; \
- /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
- tq -= (t5*1545 + 2048) >> 12; \
- /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
- t5 -= (tq*1971 + 1024) >> 11; \
- /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
- ts += (t3*323 + 256) >> 9; \
- /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
- t3 -= (ts*5707 + 2048) >> 12; \
- /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
- ts += (t3*2229 + 2048) >> 12; \
- /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
- tj -= (tc*1061 + 1024) >> 11; \
- /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
- tc += (tj*6671 + 4096) >> 13; \
- /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
- tj += (tc*6287 + 16384) >> 15; \
- /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
- tk -= (tb*4359 + 4096) >> 13; \
- /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
- tb += (tk*3099 + 2048) >> 12; \
- /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
- tk += (tb*2109 + 4096) >> 13; \
- /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
- tr += (t4*5017 + 4096) >> 13; \
- /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
- t4 -= (tr*1413 + 512) >> 10; \
- /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
- tr += (t4*8195 + 8192) >> 14; \
- /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
- t9 += (tm*2373 + 2048) >> 12; \
- /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
- tm -= (t9*5209 + 4096) >> 13; \
- /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
- t9 -= (tm*3391 + 4096) >> 13; \
- /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
- tp -= (t6*1517 + 1024) >> 11; \
- /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
- t6 += (tp*1817 + 2048) >> 12; \
- /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
- tp += (t6*6331 + 4096) >> 13; \
- /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
- th -= (te*515 + 512) >> 10; \
- /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
- te += (th*7567 + 4096) >> 13; \
- /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
- th += (te*2513 + 16384) >> 15; \
- /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
- tu += (t1*2753 + 2048) >> 12; \
- /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
- t1 -= (tu*5777 + 2048) >> 12; \
- /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
- OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
- tu += (t1*1301 + 1024) >> 11; \
- } \
- while (0)
-
-#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
- tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
- /* Embedded 32-point asymmetric Type-IV iDST. */ \
- do { \
- int t0h; \
- int t4h; \
- int tbh; \
- int tfh; \
- int tgh; \
- int tkh; \
- int trh; \
- int tvh; \
- /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
- tf -= (tg*1301 + 1024) >> 11; \
- /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
- tg += (tf*5777 + 2048) >> 12; \
- /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
- tf -= (tg*2753 + 2048) >> 12; \
- /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
- th -= (te*2513 + 16384) >> 15; \
- /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
- te -= (th*7567 + 4096) >> 13; \
- /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
- th += (te*515 + 512) >> 10; \
- /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
- tj -= (tc*6331 + 4096) >> 13; \
- /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
- tc -= (tj*1817 + 2048) >> 12; \
- /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
- tj += (tc*1517 + 1024) >> 11; \
- /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
- ti += (td*3391 + 4096) >> 13; \
- /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
- td += (ti*5209 + 4096) >> 13; \
- /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
- ti -= (td*2373 + 2048) >> 12; \
- /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
- tr -= (t4*8195 + 8192) >> 14; \
- /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
- t4 += (tr*1413 + 512) >> 10; \
- /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
- tr -= (t4*5017 + 4096) >> 13; \
- /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
- t5 -= (tq*2109 + 4096) >> 13; \
- /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
- tq -= (t5*3099 + 2048) >> 12; \
- /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
- t5 += (tq*4359 + 4096) >> 13; \
- /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
- tp -= (t6*6287 + 16384) >> 15; \
- /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
- t6 -= (tp*6671 + 4096) >> 13; \
- /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
- tp += (t6*1061 + 1024) >> 11; \
- /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
- t7 -= (to*2229 + 2048) >> 12; \
- /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
- to += (t7*5707 + 2048) >> 12; \
- /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
- t7 -= (to*323 + 256) >> 9; \
- /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
- tk += (tb*1971 + 1024) >> 11; \
- /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
- tb += (tk*1545 + 2048) >> 12; \
- /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
- tk -= (tb*3459 + 2048) >> 12; \
- /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
- tl -= (ta*5417 + 8192) >> 14; \
- /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
- ta -= (tl*2855 + 2048) >> 12; \
- /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
- tl += (ta*2261 + 2048) >> 12; \
- /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
- t9 -= (tm*4327 + 16384) >> 15; \
- /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
- tm -= (t9*891 + 512) >> 10; \
- /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
- t9 += (tm*4167 + 4096) >> 13; \
- /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
- tn -= (t8*2413 + 2048) >> 12; \
- /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
- t8 += (tn*5749 + 2048) >> 12; \
- /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
- tn -= (t8*5331 + 4096) >> 13; \
- /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
- ts += (t3*2571 + 2048) >> 12; \
- /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
- t3 += (ts*4169 + 4096) >> 13; \
- /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
- ts -= (t3*5477 + 4096) >> 13; \
- /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
- tt -= (t2*4187 + 4096) >> 13; \
- /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
- t2 -= (tt*4695 + 4096) >> 13; \
- /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
- tt += (t2*2527 + 2048) >> 12; \
- /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
- t1 -= (tu*815 + 16384) >> 15; \
- /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
- tu -= (t1*1997 + 1024) >> 11; \
- /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
- t1 += (tu*4099 + 4096) >> 13; \
- /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
- tv -= (t0*5593 + 4096) >> 13; \
- /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
- t0 += (tv*5791 + 2048) >> 12; \
- /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
- tv -= (t0*2847 + 2048) >> 12; \
- \
- t7 = -t7; \
- tf = -tf; \
- tn = -tn; \
- tr = -tr; \
- \
- t7 -= OD_DCT_RSHIFT(t6, 1); \
- t6 += t7; \
- tp -= OD_DCT_RSHIFT(to, 1); \
- to += tp; \
- tr -= OD_DCT_RSHIFT(tq, 1); \
- tq += tr; \
- t5 -= OD_DCT_RSHIFT(t4, 1); \
- t4 += t5; \
- tt -= OD_DCT_RSHIFT(t3, 1); \
- t3 += tt; \
- ts -= OD_DCT_RSHIFT(t2, 1); \
- t2 += ts; \
- tv += OD_DCT_RSHIFT(tu, 1); \
- tu -= tv; \
- t1 -= OD_DCT_RSHIFT(t0, 1); \
- t0 += t1; \
- th -= OD_DCT_RSHIFT(tg, 1); \
- tg += th; \
- tf -= OD_DCT_RSHIFT(te, 1); \
- te += tf; \
- ti += OD_DCT_RSHIFT(tc, 1); \
- tc -= ti; \
- tj += OD_DCT_RSHIFT(td, 1); \
- td -= tj; \
- tn -= OD_DCT_RSHIFT(tm, 1); \
- tm += tn; \
- t9 -= OD_DCT_RSHIFT(t8, 1); \
- t8 += t9; \
- tl -= OD_DCT_RSHIFT(tb, 1); \
- tb += tl; \
- tk -= OD_DCT_RSHIFT(ta, 1); \
- ta += tk; \
- \
- ti -= th; \
- th += OD_DCT_RSHIFT(ti, 1); \
- td -= te; \
- te += OD_DCT_RSHIFT(td, 1); \
- tm += tl; \
- tl -= OD_DCT_RSHIFT(tm, 1); \
- t9 += ta; \
- ta -= OD_DCT_RSHIFT(t9, 1); \
- tp += tq; \
- tq -= OD_DCT_RSHIFT(tp, 1); \
- t6 += t5; \
- t5 -= OD_DCT_RSHIFT(t6, 1); \
- t2 -= t1; \
- t1 += OD_DCT_RSHIFT(t2, 1); \
- tt -= tu; \
- tu += OD_DCT_RSHIFT(tt, 1); \
- tr += t7; \
- trh = OD_DCT_RSHIFT(tr, 1); \
- t7 -= trh; \
- t4 -= to; \
- t4h = OD_DCT_RSHIFT(t4, 1); \
- to += t4h; \
- t0 += t3; \
- t0h = OD_DCT_RSHIFT(t0, 1); \
- t3 -= t0h; \
- tv += ts; \
- tvh = OD_DCT_RSHIFT(tv, 1); \
- ts -= tvh; \
- tf -= tc; \
- tfh = OD_DCT_RSHIFT(tf, 1); \
- tc += tfh; \
- tg += tj; \
- tgh = OD_DCT_RSHIFT(tg, 1); \
- tj -= tgh; \
- tb -= t8; \
- tbh = OD_DCT_RSHIFT(tb, 1); \
- t8 += tbh; \
- tk += tn; \
- tkh = OD_DCT_RSHIFT(tk, 1); \
- tn -= tkh; \
- \
- ta = -ta; \
- tq = -tq; \
- \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- te -= (th*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
- th += (te*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- te -= (th*4861 + 16384) >> 15; \
- /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
- tm -= (t9*513 + 1024) >> 11; \
- /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
- t9 += (tm*7723 + 8192) >> 14; \
- /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
- tm -= (t9*513 + 1024) >> 11; \
- /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- t6 -= (tp*2931 + 4096) >> 13; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
- tp += (t6*5197 + 4096) >> 13; \
- /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- t6 -= (tp*2931 + 4096) >> 13; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- tu -= (t1*805 + 8192) >> 14; \
- /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
- t1 += (tu*803 + 4096) >> 13; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- tu -= (t1*805 + 8192) >> 14; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- ti -= (td*4861 + 16384) >> 15; \
- /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
- td += (ti*1189 + 2048) >> 12; \
- /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
- ti -= (td*4861 + 16384) >> 15; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
- ta -= (tl*2455 + 2048) >> 12; \
- /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
- tl += (ta*14449 + 8192) >> 14; \
- /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
- ta -= (tl*2455 + 2048) >> 12; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- t5 -= (tq*11725 + 16384) >> 15; \
- /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
- tq += (t5*5197 + 4096) >> 13; \
- /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
- t5 -= (tq*11725 + 16384) >> 15; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- t2 -= (tt*805 + 8192) >> 14; \
- /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
- tt += (t2*803 + 4096) >> 13; \
- /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
- t2 -= (tt*805 + 8192) >> 14; \
- \
- tl = -tl; \
- ti = -ti; \
- \
- th += OD_DCT_RSHIFT(t9, 1); \
- t9 -= th; \
- te -= OD_DCT_RSHIFT(tm, 1); \
- tm += te; \
- t1 += OD_DCT_RSHIFT(tp, 1); \
- tp -= t1; \
- tu -= OD_DCT_RSHIFT(t6, 1); \
- t6 += tu; \
- ta -= OD_DCT_RSHIFT(td, 1); \
- td += ta; \
- tl += OD_DCT_RSHIFT(ti, 1); \
- ti -= tl; \
- t5 += OD_DCT_RSHIFT(tt, 1); \
- tt -= t5; \
- tq += OD_DCT_RSHIFT(t2, 1); \
- t2 -= tq; \
- \
- t8 -= tgh; \
- tg += t8; \
- tn += tfh; \
- tf -= tn; \
- t7 -= tvh; \
- tv += t7; \
- to -= t0h; \
- t0 += to; \
- tc += tbh; \
- tb -= tc; \
- tj += tkh; \
- tk -= tj; \
- ts += t4h; \
- t4 -= ts; \
- t3 += trh; \
- tr -= t3; \
- \
- tk = -tk; \
- \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- tc -= (tj*2485 + 4096) >> 13; \
- /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
- tj += (tc*18205 + 16384) >> 15; \
- /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
- tc -= (tj*2485 + 4096) >> 13; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- ts -= (t3*3227 + 16384) >> 15; \
- /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
- t3 += (ts*6393 + 16384) >> 15; \
- /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
- ts -= (t3*3227 + 16384) >> 15; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
- tk -= (tb*17515 + 16384) >> 15; \
- /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
- tb += (tk*13623 + 8192) >> 14; \
- /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
- tk -= (tb*17515 + 16384) >> 15; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
- t4 -= (tr*6723 + 4096) >> 13; \
- /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
- tr += (t4*16069 + 8192) >> 14; \
- /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
- t4 -= (tr*6723 + 4096) >> 13; \
- \
- t4 = -t4; \
- \
- tp += tm; \
- tm -= OD_DCT_RSHIFT(tp, 1); \
- t9 -= t6; \
- t6 += OD_DCT_RSHIFT(t9, 1); \
- th -= t1; \
- t1 += OD_DCT_RSHIFT(th, 1); \
- tu -= te; \
- te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
- t5 -= tl; \
- tl += OD_DCT_RSHIFT(t5, 1); \
- ta += tq; \
- tq -= OD_DCT_RSHIFT(ta, 1); \
- td += tt; \
- tt -= OD_DCT_RSHIFT(td, 1); \
- t2 -= ti; \
- ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
- t7 += t8; \
- t8 -= OD_DCT_RSHIFT(t7, 1); \
- tn -= to; \
- to += OD_DCT_RSHIFT(tn, 1); \
- tf -= tv; \
- tv += OD_DCT_RSHIFT(tf, 1); \
- t0 += tg; \
- tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
- tj -= t3; \
- t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
- ts -= tc; \
- tc += OD_DCT_RSHIFT(ts, 1); \
- t4 -= tb; \
- tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
- tk -= tr; \
- tr += OD_DCT_RSHIFT(tk, 1); \
- \
- t1 = -t1; \
- t3 = -t3; \
- t7 = -t7; \
- t8 = -t8; \
- tg = -tg; \
- tm = -tm; \
- to = -to; \
- \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- tm -= (t9*14341 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t9 += (tm*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- tm -= (t9*4161 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- tp -= (t6*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t6 += (tp*15137 + 8192) >> 14; \
- /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- tp -= (t6*28681 + 16384) >> 15; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- th += (te*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- te += (th*11585 + 8192) >> 14; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- th -= (te*29957 + 16384) >> 15; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- tq -= (t5*14341 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t5 += (tq*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- tq -= (t5*4161 + 8192) >> 14; \
- /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
- ta -= (tl*3259 + 4096) >> 13; \
- /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
- tl += (ta*3135 + 8192) >> 14; \
- /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
- ta -= (tl*3259 + 4096) >> 13; \
- /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- ti -= (td*7489 + 4096) >> 13; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- td += (ti*11585 + 8192) >> 14; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- ti += (td*19195 + 16384) >> 15; \
- /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- to -= (t7*14341 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t7 += (to*15137 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- to -= (t7*4161 + 8192) >> 14; \
- /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
- tn -= (t8*4161 + 8192) >> 14; \
- /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
- t8 += (tn*15137 + 8192) >> 14; \
- /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
- tn -= (t8*28681 + 16384) >> 15; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- tf += (tg*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- tg += (tf*11585 + 8192) >> 14; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- tf -= (tg*29957 + 16384) >> 15; \
- /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
- tj += (tc*19195 + 16384) >> 15; \
- /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
- tc += (tj*11585 + 8192) >> 14; \
- /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
- tj -= (tc*29957 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- tk += (tb*13573 + 8192) >> 14; \
- /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
- tb -= (tk*11585 + 16384) >> 15; \
- /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
- tk += (tb*13573 + 8192) >> 14; \
- \
- tf = -tf; \
- \
- } \
- while (0)
-
-#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
- us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
- ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
- ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
- /* Embedded 64-point orthonormal Type-II fDCT. */ \
- do { \
- int uwh; \
- int uxh; \
- int uyh; \
- int uzh; \
- int uAh; \
- int uBh; \
- int uCh; \
- int uDh; \
- int uEh; \
- int uFh; \
- int uGh; \
- int uHh; \
- int uIh; \
- int uJh; \
- int uKh; \
- int uLh; \
- int uMh; \
- int uNh; \
- int uOh; \
- int uPh; \
- int uQh; \
- int uRh; \
- int uSh; \
- int uTh; \
- int uUh; \
- int uVh; \
- int uWh; \
- int uXh; \
- int uYh; \
- int uZh; \
- int u_h; \
- int uh_; \
- u = u0 - u; \
- uh_ = OD_DCT_RSHIFT(u, 1); \
- u0 -= uh_; \
- u_ += u1; \
- u_h = OD_DCT_RSHIFT(u_, 1); \
- u1 = u_h - u1; \
- uZ = u2 - uZ; \
- uZh = OD_DCT_RSHIFT(uZ, 1); \
- u2 -= uZh; \
- uY += u3; \
- uYh = OD_DCT_RSHIFT(uY, 1); \
- u3 = uYh - u3; \
- uX = u4 - uX; \
- uXh = OD_DCT_RSHIFT(uX, 1); \
- u4 -= uXh; \
- uW += u5; \
- uWh = OD_DCT_RSHIFT(uW, 1); \
- u5 = uWh - u5; \
- uV = u6 - uV; \
- uVh = OD_DCT_RSHIFT(uV, 1); \
- u6 -= uVh; \
- uU += u7; \
- uUh = OD_DCT_RSHIFT(uU, 1); \
- u7 = uUh - u7; \
- uT = u8 - uT; \
- uTh = OD_DCT_RSHIFT(uT, 1); \
- u8 -= uTh; \
- uS += u9; \
- uSh = OD_DCT_RSHIFT(uS, 1); \
- u9 = uSh - u9; \
- uR = ua - uR; \
- uRh = OD_DCT_RSHIFT(uR, 1); \
- ua -= uRh; \
- uQ += ub; \
- uQh = OD_DCT_RSHIFT(uQ, 1); \
- ub = uQh - ub; \
- uP = uc - uP; \
- uPh = OD_DCT_RSHIFT(uP, 1); \
- uc -= uPh; \
- uO += ud; \
- uOh = OD_DCT_RSHIFT(uO, 1); \
- ud = uOh - ud; \
- uN = ue - uN; \
- uNh = OD_DCT_RSHIFT(uN, 1); \
- ue -= uNh; \
- uM += uf; \
- uMh = OD_DCT_RSHIFT(uM, 1); \
- uf = uMh - uf; \
- uL = ug - uL; \
- uLh = OD_DCT_RSHIFT(uL, 1); \
- ug -= uLh; \
- uK += uh; \
- uKh = OD_DCT_RSHIFT(uK, 1); \
- uh = uKh - uh; \
- uJ = ui - uJ; \
- uJh = OD_DCT_RSHIFT(uJ, 1); \
- ui -= uJh; \
- uI += uj; \
- uIh = OD_DCT_RSHIFT(uI, 1); \
- uj = uIh - uj; \
- uH = uk - uH; \
- uHh = OD_DCT_RSHIFT(uH, 1); \
- uk -= uHh; \
- uG += ul; \
- uGh = OD_DCT_RSHIFT(uG, 1); \
- ul = uGh - ul; \
- uF = um - uF; \
- uFh = OD_DCT_RSHIFT(uF, 1); \
- um -= uFh; \
- uE += un; \
- uEh = OD_DCT_RSHIFT(uE, 1); \
- un = uEh - un; \
- uD = uo - uD; \
- uDh = OD_DCT_RSHIFT(uD, 1); \
- uo -= uDh; \
- uC += up; \
- uCh = OD_DCT_RSHIFT(uC, 1); \
- up = uCh - up; \
- uB = uq - uB; \
- uBh = OD_DCT_RSHIFT(uB, 1); \
- uq -= uBh; \
- uA += ur; \
- uAh = OD_DCT_RSHIFT(uA, 1); \
- ur = uAh - ur; \
- uz = us - uz; \
- uzh = OD_DCT_RSHIFT(uz, 1); \
- us -= uzh; \
- uy += ut; \
- uyh = OD_DCT_RSHIFT(uy, 1); \
- ut = uyh - ut; \
- ux = uu - ux; \
- uxh = OD_DCT_RSHIFT(ux, 1); \
- uu -= uxh; \
- uw += uv; \
- uwh = OD_DCT_RSHIFT(uw, 1); \
- uv = uwh - uv; \
- OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
- u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
- ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
- ue, uK, uKh, uu, u_, u_h); \
- OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
- uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
- } \
- while (0)
-
-#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
- us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
- ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
- ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
- /* Embedded 64-point orthonormal Type-II fDCT. */ \
- do { \
- int u1h; \
- int u3h; \
- int u5h; \
- int u7h; \
- int u9h; \
- int ubh; \
- int udh; \
- int ufh; \
- int uhh; \
- int ujh; \
- int ulh; \
- int unh; \
- int uph; \
- int urh; \
- int uth; \
- int uvh; \
- int uxh; \
- int uzh; \
- int uBh; \
- int uDh; \
- int uFh; \
- int uHh; \
- int uJh; \
- int uLh; \
- int uNh; \
- int uPh; \
- int uRh; \
- int uTh; \
- int uVh; \
- int uXh; \
- int uZh; \
- int uh_; \
- OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
- uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
- OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
- ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
- ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
- uv, uvh); \
- uh_ = OD_DCT_RSHIFT(u, 1); \
- u0 += uh_; \
- u = u0 - u; \
- u_ = u1h - u_; \
- u1 -= u_; \
- uZh = OD_DCT_RSHIFT(uZ, 1); \
- u2 += uZh; \
- uZ = u2 - uZ; \
- uY = u3h - uY; \
- u3 -= uY; \
- uXh = OD_DCT_RSHIFT(uX, 1); \
- u4 += uXh; \
- uX = u4 - uX; \
- uW = u5h - uW; \
- u5 -= uW; \
- uVh = OD_DCT_RSHIFT(uV, 1); \
- u6 += uVh; \
- uV = u6 - uV; \
- uU = u7h - uU; \
- u7 -= uU; \
- uTh = OD_DCT_RSHIFT(uT, 1); \
- u8 += uTh; \
- uT = u8 - uT; \
- uS = u9h - uS; \
- u9 -= uS; \
- uRh = OD_DCT_RSHIFT(uR, 1); \
- ua += uRh; \
- uR = ua - uR; \
- uQ = ubh - uQ; \
- ub -= uQ; \
- uPh = OD_DCT_RSHIFT(uP, 1); \
- uc += uPh; \
- uP = uc - uP; \
- uO = udh - uO; \
- ud -= uO; \
- uNh = OD_DCT_RSHIFT(uN, 1); \
- ue += uNh; \
- uN = ue - uN; \
- uM = ufh - uM; \
- uf -= uM; \
- uLh = OD_DCT_RSHIFT(uL, 1); \
- ug += uLh; \
- uL = ug - uL; \
- uK = uhh - uK; \
- uh -= uK; \
- uJh = OD_DCT_RSHIFT(uJ, 1); \
- ui += uJh; \
- uJ = ui - uJ; \
- uI = ujh - uI; \
- uj -= uI; \
- uHh = OD_DCT_RSHIFT(uH, 1); \
- uk += uHh; \
- uH = uk - uH; \
- uG = ulh - uG; \
- ul -= uG; \
- uFh = OD_DCT_RSHIFT(uF, 1); \
- um += uFh; \
- uF = um - uF; \
- uE = unh - uE; \
- un -= uE; \
- uDh = OD_DCT_RSHIFT(uD, 1); \
- uo += uDh; \
- uD = uo - uD; \
- uC = uph - uC; \
- up -= uC; \
- uBh = OD_DCT_RSHIFT(uB, 1); \
- uq += uBh; \
- uB = uq - uB; \
- uA = urh - uA; \
- ur -= uA; \
- uzh = OD_DCT_RSHIFT(uz, 1); \
- us += uzh; \
- uz = us - uz; \
- uy = uth - uy; \
- ut -= uy; \
- uxh = OD_DCT_RSHIFT(ux, 1); \
- uu += uxh; \
- ux = uu - ux; \
- uw = uvh - uw; \
- uv -= uw; \
- } while (0)
-#endif
-
-void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
- int q0;
- int q1;
- int q2;
- int q3;
- q0 = x[0*xstride];
- q2 = x[1*xstride];
- q1 = x[2*xstride];
- q3 = x[3*xstride];
- OD_FDCT_4(q0, q2, q1, q3);
- y[0] = (od_coeff)q0;
- y[1] = (od_coeff)q1;
- y[2] = (od_coeff)q2;
- y[3] = (od_coeff)q3;
-}
-
-void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
- int q0;
- int q1;
- int q2;
- int q3;
- q0 = y[0];
- q2 = y[1];
- q1 = y[2];
- q3 = y[3];
- OD_IDCT_4(q0, q2, q1, q3);
- x[0*xstride] = q0;
- x[1*xstride] = q1;
- x[2*xstride] = q2;
- x[3*xstride] = q3;
-}
-
-void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) {
- int q0;
- int q1;
- int q2;
- int q3;
- q0 = x[3*xstride];
- q2 = x[2*xstride];
- q1 = x[1*xstride];
- q3 = x[0*xstride];
- OD_FDST_4(q0, q2, q1, q3);
- y[0] = (od_coeff)q3;
- y[1] = (od_coeff)q2;
- y[2] = (od_coeff)q1;
- y[3] = (od_coeff)q0;
-}
-
-void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) {
- int q0;
- int q1;
- int q2;
- int q3;
- q0 = y[3];
- q2 = y[2];
- q1 = y[1];
- q3 = y[0];
- OD_IDST_4(q0, q2, q1, q3);
- x[0*xstride] = q3;
- x[1*xstride] = q2;
- x[2*xstride] = q1;
- x[3*xstride] = q0;
-}
-
-void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
- int r0;
- int r1;
- int r2;
- int r3;
- int r4;
- int r5;
- int r6;
- int r7;
- r0 = x[0*xstride];
- r4 = x[1*xstride];
- r2 = x[2*xstride];
- r6 = x[3*xstride];
- r1 = x[4*xstride];
- r5 = x[5*xstride];
- r3 = x[6*xstride];
- r7 = x[7*xstride];
- OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
- y[0] = (od_coeff)r0;
- y[1] = (od_coeff)r1;
- y[2] = (od_coeff)r2;
- y[3] = (od_coeff)r3;
- y[4] = (od_coeff)r4;
- y[5] = (od_coeff)r5;
- y[6] = (od_coeff)r6;
- y[7] = (od_coeff)r7;
-}
-
-void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) {
- int r0;
- int r1;
- int r2;
- int r3;
- int r4;
- int r5;
- int r6;
- int r7;
- r0 = y[0];
- r4 = y[1];
- r2 = y[2];
- r6 = y[3];
- r1 = y[4];
- r5 = y[5];
- r3 = y[6];
- r7 = y[7];
- OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
- x[0*xstride] = (od_coeff)r0;
- x[1*xstride] = (od_coeff)r1;
- x[2*xstride] = (od_coeff)r2;
- x[3*xstride] = (od_coeff)r3;
- x[4*xstride] = (od_coeff)r4;
- x[5*xstride] = (od_coeff)r5;
- x[6*xstride] = (od_coeff)r6;
- x[7*xstride] = (od_coeff)r7;
-}
-
-void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) {
- int r0;
- int r1;
- int r2;
- int r3;
- int r4;
- int r5;
- int r6;
- int r7;
- r0 = x[0*xstride];
- r4 = x[1*xstride];
- r2 = x[2*xstride];
- r6 = x[3*xstride];
- r1 = x[4*xstride];
- r5 = x[5*xstride];
- r3 = x[6*xstride];
- r7 = x[7*xstride];
- OD_FDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
- y[0] = (od_coeff)r0;
- y[1] = (od_coeff)r1;
- y[2] = (od_coeff)r2;
- y[3] = (od_coeff)r3;
- y[4] = (od_coeff)r4;
- y[5] = (od_coeff)r5;
- y[6] = (od_coeff)r6;
- y[7] = (od_coeff)r7;
-}
-
-void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
- int r0;
- int r1;
- int r2;
- int r3;
- int r4;
- int r5;
- int r6;
- int r7;
- r0 = y[0];
- r4 = y[1];
- r2 = y[2];
- r6 = y[3];
- r1 = y[4];
- r5 = y[5];
- r3 = y[6];
- r7 = y[7];
- OD_IDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
- x[0*xstride] = (od_coeff)r0;
- x[1*xstride] = (od_coeff)r1;
- x[2*xstride] = (od_coeff)r2;
- x[3*xstride] = (od_coeff)r3;
- x[4*xstride] = (od_coeff)r4;
- x[5*xstride] = (od_coeff)r5;
- x[6*xstride] = (od_coeff)r6;
- x[7*xstride] = (od_coeff)r7;
-}
-
-void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
- int s0;
- int s1;
- int s2;
- int s3;
- int s4;
- int s5;
- int s6;
- int s7;
- int s8;
- int s9;
- int sa;
- int sb;
- int sc;
- int sd;
- int se;
- int sf;
- s0 = x[0*xstride];
- s8 = x[1*xstride];
- s4 = x[2*xstride];
- sc = x[3*xstride];
- s2 = x[4*xstride];
- sa = x[5*xstride];
- s6 = x[6*xstride];
- se = x[7*xstride];
- s1 = x[8*xstride];
- s9 = x[9*xstride];
- s5 = x[10*xstride];
- sd = x[11*xstride];
- s3 = x[12*xstride];
- sb = x[13*xstride];
- s7 = x[14*xstride];
- sf = x[15*xstride];
- OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- y[0] = (od_coeff)s0;
- y[1] = (od_coeff)s1;
- y[2] = (od_coeff)s2;
- y[3] = (od_coeff)s3;
- y[4] = (od_coeff)s4;
- y[5] = (od_coeff)s5;
- y[6] = (od_coeff)s6;
- y[7] = (od_coeff)s7;
- y[8] = (od_coeff)s8;
- y[9] = (od_coeff)s9;
- y[10] = (od_coeff)sa;
- y[11] = (od_coeff)sb;
- y[12] = (od_coeff)sc;
- y[13] = (od_coeff)sd;
- y[14] = (od_coeff)se;
- y[15] = (od_coeff)sf;
-}
-
-void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
- int s0;
- int s1;
- int s2;
- int s3;
- int s4;
- int s5;
- int s6;
- int s7;
- int s8;
- int s9;
- int sa;
- int sb;
- int sc;
- int sd;
- int se;
- int sf;
- s0 = y[0];
- s8 = y[1];
- s4 = y[2];
- sc = y[3];
- s2 = y[4];
- sa = y[5];
- s6 = y[6];
- se = y[7];
- s1 = y[8];
- s9 = y[9];
- s5 = y[10];
- sd = y[11];
- s3 = y[12];
- sb = y[13];
- s7 = y[14];
- sf = y[15];
- OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- x[0*xstride] = (od_coeff)s0;
- x[1*xstride] = (od_coeff)s1;
- x[2*xstride] = (od_coeff)s2;
- x[3*xstride] = (od_coeff)s3;
- x[4*xstride] = (od_coeff)s4;
- x[5*xstride] = (od_coeff)s5;
- x[6*xstride] = (od_coeff)s6;
- x[7*xstride] = (od_coeff)s7;
- x[8*xstride] = (od_coeff)s8;
- x[9*xstride] = (od_coeff)s9;
- x[10*xstride] = (od_coeff)sa;
- x[11*xstride] = (od_coeff)sb;
- x[12*xstride] = (od_coeff)sc;
- x[13*xstride] = (od_coeff)sd;
- x[14*xstride] = (od_coeff)se;
- x[15*xstride] = (od_coeff)sf;
-}
-
-void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
- int s0;
- int s1;
- int s2;
- int s3;
- int s4;
- int s5;
- int s6;
- int s7;
- int s8;
- int s9;
- int sa;
- int sb;
- int sc;
- int sd;
- int se;
- int sf;
- s0 = x[15*xstride];
- s8 = x[14*xstride];
- s4 = x[13*xstride];
- sc = x[12*xstride];
- s2 = x[11*xstride];
- sa = x[10*xstride];
- s6 = x[9*xstride];
- se = x[8*xstride];
- s1 = x[7*xstride];
- s9 = x[6*xstride];
- s5 = x[5*xstride];
- sd = x[4*xstride];
- s3 = x[3*xstride];
- sb = x[2*xstride];
- s7 = x[1*xstride];
- sf = x[0*xstride];
- OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- y[0] = (od_coeff)sf;
- y[1] = (od_coeff)se;
- y[2] = (od_coeff)sd;
- y[3] = (od_coeff)sc;
- y[4] = (od_coeff)sb;
- y[5] = (od_coeff)sa;
- y[6] = (od_coeff)s9;
- y[7] = (od_coeff)s8;
- y[8] = (od_coeff)s7;
- y[9] = (od_coeff)s6;
- y[10] = (od_coeff)s5;
- y[11] = (od_coeff)s4;
- y[12] = (od_coeff)s3;
- y[13] = (od_coeff)s2;
- y[14] = (od_coeff)s1;
- y[15] = (od_coeff)s0;
-}
-
-void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
- int s0;
- int s1;
- int s2;
- int s3;
- int s4;
- int s5;
- int s6;
- int s7;
- int s8;
- int s9;
- int sa;
- int sb;
- int sc;
- int sd;
- int se;
- int sf;
- s0 = y[15];
- s8 = y[14];
- s4 = y[13];
- sc = y[12];
- s2 = y[11];
- sa = y[10];
- s6 = y[9];
- se = y[8];
- s1 = y[7];
- s9 = y[6];
- s5 = y[5];
- sd = y[4];
- s3 = y[3];
- sb = y[2];
- s7 = y[1];
- sf = y[0];
- OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- x[0*xstride] = (od_coeff)sf;
- x[1*xstride] = (od_coeff)se;
- x[2*xstride] = (od_coeff)sd;
- x[3*xstride] = (od_coeff)sc;
- x[4*xstride] = (od_coeff)sb;
- x[5*xstride] = (od_coeff)sa;
- x[6*xstride] = (od_coeff)s9;
- x[7*xstride] = (od_coeff)s8;
- x[8*xstride] = (od_coeff)s7;
- x[9*xstride] = (od_coeff)s6;
- x[10*xstride] = (od_coeff)s5;
- x[11*xstride] = (od_coeff)s4;
- x[12*xstride] = (od_coeff)s3;
- x[13*xstride] = (od_coeff)s2;
- x[14*xstride] = (od_coeff)s1;
- x[15*xstride] = (od_coeff)s0;
-}
-
-void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
- /*215 adds, 38 shifts, 87 "muls".*/
- int t0;
- int t1;
- int t2;
- int t3;
- int t4;
- int t5;
- int t6;
- int t7;
- int t8;
- int t9;
- int ta;
- int tb;
- int tc;
- int td;
- int te;
- int tf;
- int tg;
- int th;
- int ti;
- int tj;
- int tk;
- int tl;
- int tm;
- int tn;
- int to;
- int tp;
- int tq;
- int tr;
- int ts;
- int tt;
- int tu;
- int tv;
- t0 = x[0*xstride];
- tg = x[1*xstride];
- t8 = x[2*xstride];
- to = x[3*xstride];
- t4 = x[4*xstride];
- tk = x[5*xstride];
- tc = x[6*xstride];
- ts = x[7*xstride];
- t2 = x[8*xstride];
- ti = x[9*xstride];
- ta = x[10*xstride];
- tq = x[11*xstride];
- t6 = x[12*xstride];
- tm = x[13*xstride];
- te = x[14*xstride];
- tu = x[15*xstride];
- t1 = x[16*xstride];
- th = x[17*xstride];
- t9 = x[18*xstride];
- tp = x[19*xstride];
- t5 = x[20*xstride];
- tl = x[21*xstride];
- td = x[22*xstride];
- tt = x[23*xstride];
- t3 = x[24*xstride];
- tj = x[25*xstride];
- tb = x[26*xstride];
- tr = x[27*xstride];
- t7 = x[28*xstride];
- tn = x[29*xstride];
- tf = x[30*xstride];
- tv = x[31*xstride];
- OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
- t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
- y[0] = (od_coeff)t0;
- y[1] = (od_coeff)t1;
- y[2] = (od_coeff)t2;
- y[3] = (od_coeff)t3;
- y[4] = (od_coeff)t4;
- y[5] = (od_coeff)t5;
- y[6] = (od_coeff)t6;
- y[7] = (od_coeff)t7;
- y[8] = (od_coeff)t8;
- y[9] = (od_coeff)t9;
- y[10] = (od_coeff)ta;
- y[11] = (od_coeff)tb;
- y[12] = (od_coeff)tc;
- y[13] = (od_coeff)td;
- y[14] = (od_coeff)te;
- y[15] = (od_coeff)tf;
- y[16] = (od_coeff)tg;
- y[17] = (od_coeff)th;
- y[18] = (od_coeff)ti;
- y[19] = (od_coeff)tj;
- y[20] = (od_coeff)tk;
- y[21] = (od_coeff)tl;
- y[22] = (od_coeff)tm;
- y[23] = (od_coeff)tn;
- y[24] = (od_coeff)to;
- y[25] = (od_coeff)tp;
- y[26] = (od_coeff)tq;
- y[27] = (od_coeff)tr;
- y[28] = (od_coeff)ts;
- y[29] = (od_coeff)tt;
- y[30] = (od_coeff)tu;
- y[31] = (od_coeff)tv;
-}
-
-void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
- int t0;
- int t1;
- int t2;
- int t3;
- int t4;
- int t5;
- int t6;
- int t7;
- int t8;
- int t9;
- int ta;
- int tb;
- int tc;
- int td;
- int te;
- int tf;
- int tg;
- int th;
- int ti;
- int tj;
- int tk;
- int tl;
- int tm;
- int tn;
- int to;
- int tp;
- int tq;
- int tr;
- int ts;
- int tt;
- int tu;
- int tv;
- t0 = y[0];
- tg = y[1];
- t8 = y[2];
- to = y[3];
- t4 = y[4];
- tk = y[5];
- tc = y[6];
- ts = y[7];
- t2 = y[8];
- ti = y[9];
- ta = y[10];
- tq = y[11];
- t6 = y[12];
- tm = y[13];
- te = y[14];
- tu = y[15];
- t1 = y[16];
- th = y[17];
- t9 = y[18];
- tp = y[19];
- t5 = y[20];
- tl = y[21];
- td = y[22];
- tt = y[23];
- t3 = y[24];
- tj = y[25];
- tb = y[26];
- tr = y[27];
- t7 = y[28];
- tn = y[29];
- tf = y[30];
- tv = y[31];
- OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
- t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
- x[0*xstride] = (od_coeff)t0;
- x[1*xstride] = (od_coeff)t1;
- x[2*xstride] = (od_coeff)t2;
- x[3*xstride] = (od_coeff)t3;
- x[4*xstride] = (od_coeff)t4;
- x[5*xstride] = (od_coeff)t5;
- x[6*xstride] = (od_coeff)t6;
- x[7*xstride] = (od_coeff)t7;
- x[8*xstride] = (od_coeff)t8;
- x[9*xstride] = (od_coeff)t9;
- x[10*xstride] = (od_coeff)ta;
- x[11*xstride] = (od_coeff)tb;
- x[12*xstride] = (od_coeff)tc;
- x[13*xstride] = (od_coeff)td;
- x[14*xstride] = (od_coeff)te;
- x[15*xstride] = (od_coeff)tf;
- x[16*xstride] = (od_coeff)tg;
- x[17*xstride] = (od_coeff)th;
- x[18*xstride] = (od_coeff)ti;
- x[19*xstride] = (od_coeff)tj;
- x[20*xstride] = (od_coeff)tk;
- x[21*xstride] = (od_coeff)tl;
- x[22*xstride] = (od_coeff)tm;
- x[23*xstride] = (od_coeff)tn;
- x[24*xstride] = (od_coeff)to;
- x[25*xstride] = (od_coeff)tp;
- x[26*xstride] = (od_coeff)tq;
- x[27*xstride] = (od_coeff)tr;
- x[28*xstride] = (od_coeff)ts;
- x[29*xstride] = (od_coeff)tt;
- x[30*xstride] = (od_coeff)tu;
- x[31*xstride] = (od_coeff)tv;
-}
-
-#if CONFIG_TX64X64
-void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
- int t0;
- int t1;
- int t2;
- int t3;
- int t4;
- int t5;
- int t6;
- int t7;
- int t8;
- int t9;
- int ta;
- int tb;
- int tc;
- int td;
- int te;
- int tf;
- int tg;
- int th;
- int ti;
- int tj;
- int tk;
- int tl;
- int tm;
- int tn;
- int to;
- int tp;
- int tq;
- int tr;
- int ts;
- int tt;
- int tu;
- int tv;
- int tw;
- int tx;
- int ty;
- int tz;
- int tA;
- int tB;
- int tC;
- int tD;
- int tE;
- int tF;
- int tG;
- int tH;
- int tI;
- int tJ;
- int tK;
- int tL;
- int tM;
- int tN;
- int tO;
- int tP;
- int tQ;
- int tR;
- int tS;
- int tT;
- int tU;
- int tV;
- int tW;
- int tX;
- int tY;
- int tZ;
- int t_;
- int t;
- t0 = x[0*xstride];
- tw = x[1*xstride];
- tg = x[2*xstride];
- tM = x[3*xstride];
- t8 = x[4*xstride];
- tE = x[5*xstride];
- to = x[6*xstride];
- tU = x[7*xstride];
- t4 = x[8*xstride];
- tA = x[9*xstride];
- tk = x[10*xstride];
- tQ = x[11*xstride];
- tc = x[12*xstride];
- tI = x[13*xstride];
- ts = x[14*xstride];
- tY = x[15*xstride];
- t2 = x[16*xstride];
- ty = x[17*xstride];
- ti = x[18*xstride];
- tO = x[19*xstride];
- ta = x[20*xstride];
- tG = x[21*xstride];
- tq = x[22*xstride];
- tW = x[23*xstride];
- t6 = x[24*xstride];
- tC = x[25*xstride];
- tm = x[26*xstride];
- tS = x[27*xstride];
- te = x[28*xstride];
- tK = x[29*xstride];
- tu = x[30*xstride];
- t_ = x[31*xstride];
- t1 = x[32*xstride];
- tx = x[33*xstride];
- th = x[34*xstride];
- tN = x[35*xstride];
- t9 = x[36*xstride];
- tF = x[37*xstride];
- tp = x[38*xstride];
- tV = x[39*xstride];
- t5 = x[40*xstride];
- tB = x[41*xstride];
- tl = x[42*xstride];
- tR = x[43*xstride];
- td = x[44*xstride];
- tJ = x[45*xstride];
- tt = x[46*xstride];
- tZ = x[47*xstride];
- t3 = x[48*xstride];
- tz = x[49*xstride];
- tj = x[50*xstride];
- tP = x[51*xstride];
- tb = x[52*xstride];
- tH = x[53*xstride];
- tr = x[54*xstride];
- tX = x[55*xstride];
- t7 = x[56*xstride];
- tD = x[57*xstride];
- tn = x[58*xstride];
- tT = x[59*xstride];
- tf = x[60*xstride];
- tL = x[61*xstride];
- tv = x[62*xstride];
- t = x[63*xstride];
- OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
- t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
- th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
- tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
- y[0] = (od_coeff)t0;
- y[1] = (od_coeff)t1;
- y[2] = (od_coeff)t2;
- y[3] = (od_coeff)t3;
- y[4] = (od_coeff)t4;
- y[5] = (od_coeff)t5;
- y[6] = (od_coeff)t6;
- y[7] = (od_coeff)t7;
- y[8] = (od_coeff)t8;
- y[9] = (od_coeff)t9;
- y[10] = (od_coeff)ta;
- y[11] = (od_coeff)tb;
- y[12] = (od_coeff)tc;
- y[13] = (od_coeff)td;
- y[14] = (od_coeff)te;
- y[15] = (od_coeff)tf;
- y[16] = (od_coeff)tg;
- y[17] = (od_coeff)th;
- y[18] = (od_coeff)ti;
- y[19] = (od_coeff)tj;
- y[20] = (od_coeff)tk;
- y[21] = (od_coeff)tl;
- y[22] = (od_coeff)tm;
- y[23] = (od_coeff)tn;
- y[24] = (od_coeff)to;
- y[25] = (od_coeff)tp;
- y[26] = (od_coeff)tq;
- y[27] = (od_coeff)tr;
- y[28] = (od_coeff)ts;
- y[29] = (od_coeff)tt;
- y[30] = (od_coeff)tu;
- y[31] = (od_coeff)tv;
- y[32] = (od_coeff)tw;
- y[33] = (od_coeff)tx;
- y[34] = (od_coeff)ty;
- y[35] = (od_coeff)tz;
- y[36] = (od_coeff)tA;
- y[37] = (od_coeff)tB;
- y[38] = (od_coeff)tC;
- y[39] = (od_coeff)tD;
- y[40] = (od_coeff)tE;
- y[41] = (od_coeff)tF;
- y[41] = (od_coeff)tF;
- y[42] = (od_coeff)tG;
- y[43] = (od_coeff)tH;
- y[44] = (od_coeff)tI;
- y[45] = (od_coeff)tJ;
- y[46] = (od_coeff)tK;
- y[47] = (od_coeff)tL;
- y[48] = (od_coeff)tM;
- y[49] = (od_coeff)tN;
- y[50] = (od_coeff)tO;
- y[51] = (od_coeff)tP;
- y[52] = (od_coeff)tQ;
- y[53] = (od_coeff)tR;
- y[54] = (od_coeff)tS;
- y[55] = (od_coeff)tT;
- y[56] = (od_coeff)tU;
- y[57] = (od_coeff)tV;
- y[58] = (od_coeff)tW;
- y[59] = (od_coeff)tX;
- y[60] = (od_coeff)tY;
- y[61] = (od_coeff)tZ;
- y[62] = (od_coeff)t_;
- y[63] = (od_coeff)t;
-}
-
-void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
- int t0;
- int t1;
- int t2;
- int t3;
- int t4;
- int t5;
- int t6;
- int t7;
- int t8;
- int t9;
- int ta;
- int tb;
- int tc;
- int td;
- int te;
- int tf;
- int tg;
- int th;
- int ti;
- int tj;
- int tk;
- int tl;
- int tm;
- int tn;
- int to;
- int tp;
- int tq;
- int tr;
- int ts;
- int tt;
- int tu;
- int tv;
- int tw;
- int tx;
- int ty;
- int tz;
- int tA;
- int tB;
- int tC;
- int tD;
- int tE;
- int tF;
- int tG;
- int tH;
- int tI;
- int tJ;
- int tK;
- int tL;
- int tM;
- int tN;
- int tO;
- int tP;
- int tQ;
- int tR;
- int tS;
- int tT;
- int tU;
- int tV;
- int tW;
- int tX;
- int tY;
- int tZ;
- int t_;
- int t;
- t0 = y[0];
- tw = y[1];
- tg = y[2];
- tM = y[3];
- t8 = y[4];
- tE = y[5];
- to = y[6];
- tU = y[7];
- t4 = y[8];
- tA = y[9];
- tk = y[10];
- tQ = y[11];
- tc = y[12];
- tI = y[13];
- ts = y[14];
- tY = y[15];
- t2 = y[16];
- ty = y[17];
- ti = y[18];
- tO = y[19];
- ta = y[20];
- tG = y[21];
- tq = y[22];
- tW = y[23];
- t6 = y[24];
- tC = y[25];
- tm = y[26];
- tS = y[27];
- te = y[28];
- tK = y[29];
- tu = y[30];
- t_ = y[31];
- t1 = y[32];
- tx = y[33];
- th = y[34];
- tN = y[35];
- t9 = y[36];
- tF = y[37];
- tp = y[38];
- tV = y[39];
- t5 = y[40];
- tB = y[41];
- tl = y[42];
- tR = y[43];
- td = y[44];
- tJ = y[45];
- tt = y[46];
- tZ = y[47];
- t3 = y[48];
- tz = y[49];
- tj = y[50];
- tP = y[51];
- tb = y[52];
- tH = y[53];
- tr = y[54];
- tX = y[55];
- t7 = y[56];
- tD = y[57];
- tn = y[58];
- tT = y[59];
- tf = y[60];
- tL = y[61];
- tv = y[62];
- t = y[63];
- OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
- t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
- th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
- tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
- x[0*xstride] = (od_coeff)t0;
- x[1*xstride] = (od_coeff)t1;
- x[2*xstride] = (od_coeff)t2;
- x[3*xstride] = (od_coeff)t3;
- x[4*xstride] = (od_coeff)t4;
- x[5*xstride] = (od_coeff)t5;
- x[6*xstride] = (od_coeff)t6;
- x[7*xstride] = (od_coeff)t7;
- x[8*xstride] = (od_coeff)t8;
- x[9*xstride] = (od_coeff)t9;
- x[10*xstride] = (od_coeff)ta;
- x[11*xstride] = (od_coeff)tb;
- x[12*xstride] = (od_coeff)tc;
- x[13*xstride] = (od_coeff)td;
- x[14*xstride] = (od_coeff)te;
- x[15*xstride] = (od_coeff)tf;
- x[16*xstride] = (od_coeff)tg;
- x[17*xstride] = (od_coeff)th;
- x[18*xstride] = (od_coeff)ti;
- x[19*xstride] = (od_coeff)tj;
- x[20*xstride] = (od_coeff)tk;
- x[21*xstride] = (od_coeff)tl;
- x[22*xstride] = (od_coeff)tm;
- x[23*xstride] = (od_coeff)tn;
- x[24*xstride] = (od_coeff)to;
- x[25*xstride] = (od_coeff)tp;
- x[26*xstride] = (od_coeff)tq;
- x[27*xstride] = (od_coeff)tr;
- x[28*xstride] = (od_coeff)ts;
- x[29*xstride] = (od_coeff)tt;
- x[30*xstride] = (od_coeff)tu;
- x[31*xstride] = (od_coeff)tv;
- x[32*xstride] = (od_coeff)tw;
- x[33*xstride] = (od_coeff)tx;
- x[34*xstride] = (od_coeff)ty;
- x[35*xstride] = (od_coeff)tz;
- x[36*xstride] = (od_coeff)tA;
- x[37*xstride] = (od_coeff)tB;
- x[38*xstride] = (od_coeff)tC;
- x[39*xstride] = (od_coeff)tD;
- x[40*xstride] = (od_coeff)tE;
- x[41*xstride] = (od_coeff)tF;
- x[41*xstride] = (od_coeff)tF;
- x[42*xstride] = (od_coeff)tG;
- x[43*xstride] = (od_coeff)tH;
- x[44*xstride] = (od_coeff)tI;
- x[45*xstride] = (od_coeff)tJ;
- x[46*xstride] = (od_coeff)tK;
- x[47*xstride] = (od_coeff)tL;
- x[48*xstride] = (od_coeff)tM;
- x[49*xstride] = (od_coeff)tN;
- x[50*xstride] = (od_coeff)tO;
- x[51*xstride] = (od_coeff)tP;
- x[52*xstride] = (od_coeff)tQ;
- x[53*xstride] = (od_coeff)tR;
- x[54*xstride] = (od_coeff)tS;
- x[55*xstride] = (od_coeff)tT;
- x[56*xstride] = (od_coeff)tU;
- x[57*xstride] = (od_coeff)tV;
- x[58*xstride] = (od_coeff)tW;
- x[59*xstride] = (od_coeff)tX;
- x[60*xstride] = (od_coeff)tY;
- x[61*xstride] = (od_coeff)tZ;
- x[62*xstride] = (od_coeff)t_;
- x[63*xstride] = (od_coeff)t;
-}
-#endif
-
-void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[4];
- od_coeff y[4];
- for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct4(y, x, 1);
- for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idct4(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[4];
- od_coeff y[4];
- for (i = 0; i < 4; i++) y[i] = input[i];
- od_bin_idct4(x, 1, y);
- for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_fdst4(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[4];
- od_coeff y[4];
- for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
- od_bin_fdst4(y, x, 1);
- for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idst4(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[4];
- od_coeff y[4];
- for (i = 0; i < 4; i++) y[i] = input[i];
- od_bin_idst4(x, 1, y);
- for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_idtx4(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 4; i++) output[i] = input[i];
-}
-
-void daala_fdct8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct8(y, x, 1);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idct8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
- od_bin_idct8(x, 1, y);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_fdst8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
- od_bin_fdst8(y, x, 1);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idst8(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[8];
- od_coeff y[8];
- for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
- od_bin_idst8(x, 1, y);
- for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_idtx8(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 8; i++) output[i] = input[i];
-}
-
-void daala_fdct16(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[16];
- od_coeff y[16];
- for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct16(y, x, 1);
- for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idct16(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[16];
- od_coeff y[16];
- for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
- od_bin_idct16(x, 1, y);
- for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_fdst16(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[16];
- od_coeff y[16];
- for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
- od_bin_fdst16(y, x, 1);
- for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idst16(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[16];
- od_coeff y[16];
- for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
- od_bin_idst16(x, 1, y);
- for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
-}
-
-void daala_idtx16(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 16; i++) output[i] = input[i];
-}
-
-void daala_fdct32(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[32];
- od_coeff y[32];
- for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct32(y, x, 1);
- for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idct32(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[32];
- od_coeff y[32];
- for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i];
- od_bin_idct32(x, 1, y);
- for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
-}
-
-/* Preserve the "half-right" transform behavior. */
-void daala_fdst32(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 16; ++i) {
- output[16 + i] = input[i];
- }
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = input[i + 16];
- }
- daala_fdct16(inputhalf, output);
-}
-
-/* Preserve the "half-right" transform behavior. */
-void daala_idst32(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = input[i];
- }
- for (i = 0; i < 16; ++i) {
- output[i] = input[16 + i];
- }
- daala_idct16(inputhalf, output + 16);
-}
-
-void daala_idtx32(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 32; i++) output[i] = input[i];
-}
-
-#if CONFIG_TX64X64
-void daala_fdct64(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[64];
- od_coeff y[64];
- for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i];
- od_bin_fdct64(y, x, 1);
- for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i];
-}
-
-void daala_idct64(const tran_low_t *input, tran_low_t *output) {
- int i;
- od_coeff x[64];
- od_coeff y[64];
- for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
- od_bin_idct64(x, 1, y);
- for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
-}
-
-/* Preserve the "half-right" transform behavior. */
-void daala_fdst64(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[32];
- for (i = 0; i < 32; ++i) {
- output[32 + i] = input[i];
- }
- for (i = 0; i < 32; ++i) {
- inputhalf[i] = input[i + 32];
- }
- daala_fdct32(inputhalf, output);
-}
-
-/* Preserve the "half-right" transform behavior. */
-void daala_idst64(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[32];
- for (i = 0; i < 32; ++i) {
- inputhalf[i] = input[i];
- }
- for (i = 0; i < 32; ++i) {
- output[i] = input[32 + i];
- }
- daala_idct32(inputhalf, output + 32);
-}
-
-void daala_idtx64(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 64; i++) output[i] = input[i];
-}
-#endif
diff --git a/third_party/aom/av1/common/daala_tx.h b/third_party/aom/av1/common/daala_tx.h
deleted file mode 100644
index 7145b66a2..000000000
--- a/third_party/aom/av1/common/daala_tx.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef AOM_DSP_DAALA_TX_H_
-#define AOM_DSP_DAALA_TX_H_
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/common/odintrin.h"
-
-void daala_fdct4(const tran_low_t *input, tran_low_t *output);
-void daala_idct4(const tran_low_t *input, tran_low_t *output);
-void daala_fdst4(const tran_low_t *input, tran_low_t *output);
-void daala_idst4(const tran_low_t *input, tran_low_t *output);
-void daala_idtx4(const tran_low_t *input, tran_low_t *output);
-void daala_fdct8(const tran_low_t *input, tran_low_t *output);
-void daala_idct8(const tran_low_t *input, tran_low_t *output);
-void daala_fdst8(const tran_low_t *input, tran_low_t *output);
-void daala_idst8(const tran_low_t *input, tran_low_t *output);
-void daala_idtx8(const tran_low_t *input, tran_low_t *output);
-void daala_fdct16(const tran_low_t *input, tran_low_t *output);
-void daala_idct16(const tran_low_t *input, tran_low_t *output);
-void daala_fdst16(const tran_low_t *input, tran_low_t *output);
-void daala_idst16(const tran_low_t *input, tran_low_t *output);
-void daala_idtx16(const tran_low_t *input, tran_low_t *output);
-void daala_fdct32(const tran_low_t *input, tran_low_t *output);
-void daala_idct32(const tran_low_t *input, tran_low_t *output);
-void daala_fdst32(const tran_low_t *input, tran_low_t *output);
-void daala_idst32(const tran_low_t *input, tran_low_t *output);
-void daala_idtx32(const tran_low_t *input, tran_low_t *output);
-#if CONFIG_TX64X64
-void daala_fdct64(const tran_low_t *input, tran_low_t *output);
-void daala_idct64(const tran_low_t *input, tran_low_t *output);
-void daala_fdst64(const tran_low_t *input, tran_low_t *output);
-void daala_idst64(const tran_low_t *input, tran_low_t *output);
-void daala_idtx64(const tran_low_t *input, tran_low_t *output);
-#endif
-
-void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
-void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
-void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
-void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
-void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
-void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
-void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
-void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
-void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
-void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
-void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
-void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
-void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
-void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
-#if CONFIG_TX64X64
-void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
-void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
-#endif
-#endif
diff --git a/third_party/aom/av1/common/debugmodes.c b/third_party/aom/av1/common/debugmodes.c
index 91f33d4e3..868f341b5 100644
--- a/third_party/aom/av1/common/debugmodes.c
+++ b/third_party/aom/av1/common/debugmodes.c
@@ -27,7 +27,7 @@ static void log_frame_info(AV1_COMMON *cm, const char *str, FILE *f) {
static void print_mi_data(AV1_COMMON *cm, FILE *file, const char *descriptor,
size_t member_offset) {
int mi_row, mi_col;
- MODE_INFO **mi = cm->mi_grid_visible;
+ MB_MODE_INFO **mi = cm->mi_grid_visible;
int rows = cm->mi_rows;
int cols = cm->mi_cols;
char prefix = descriptor[0];
@@ -36,8 +36,7 @@ static void print_mi_data(AV1_COMMON *cm, FILE *file, const char *descriptor,
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(file, "%c ", prefix);
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(file, "%2d ",
- *((char *)((char *)(&mi[0]->mbmi) + member_offset)));
+ fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset)));
mi++;
}
fprintf(file, "\n");
@@ -50,7 +49,7 @@ void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) {
int mi_row;
int mi_col;
FILE *mvs = fopen(file, "a");
- MODE_INFO **mi = cm->mi_grid_visible;
+ MB_MODE_INFO **mi = cm->mi_grid_visible;
int rows = cm->mi_rows;
int cols = cm->mi_cols;
@@ -65,7 +64,7 @@ void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) {
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(mvs, "S ");
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%2d ", mi[0]->mbmi.skip);
+ fprintf(mvs, "%2d ", mi[0]->skip);
mi++;
}
fprintf(mvs, "\n");
@@ -79,8 +78,7 @@ void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) {
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(mvs, "V ");
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row,
- mi[0]->mbmi.mv[0].as_mv.col);
+ fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, mi[0]->mv[0].as_mv.col);
mi++;
}
fprintf(mvs, "\n");
@@ -90,3 +88,20 @@ void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) {
fclose(mvs);
}
+
+void av1_print_uncompressed_frame_header(const uint8_t *data, int size,
+ const char *filename) {
+ FILE *hdrFile = fopen(filename, "w");
+ fwrite(data, size, sizeof(uint8_t), hdrFile);
+ fclose(hdrFile);
+}
+
+void av1_print_frame_contexts(const FRAME_CONTEXT *fc, const char *filename) {
+ FILE *fcFile = fopen(filename, "w");
+ const uint16_t *fcp = (uint16_t *)fc;
+ const unsigned int n_contexts = sizeof(FRAME_CONTEXT) / sizeof(uint16_t);
+ unsigned int i;
+
+ for (i = 0; i < n_contexts; ++i) fprintf(fcFile, "%d ", *fcp++);
+ fclose(fcFile);
+}
diff --git a/third_party/aom/av1/common/entropy.c b/third_party/aom/av1/common/entropy.c
index 17a8f1356..4f95ef69b 100644
--- a/third_party/aom/av1/common/entropy.c
+++ b/third_party/aom/av1/common/entropy.c
@@ -9,7 +9,8 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom/aom_integer.h"
#include "aom_mem/aom_mem.h"
#include "av1/common/blockd.h"
@@ -17,2442 +18,161 @@
#include "av1/common/entropymode.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/scan.h"
-#if CONFIG_Q_ADAPT_PROBS
#include "av1/common/token_cdfs.h"
-#endif // CONFIG_Q_ADAPT_PROBS
-#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
-#endif
-
-// Unconstrained Node Tree
-/* clang-format off */
-const aom_tree_index av1_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
- 2, 6, // 0 = LOW_VAL
- -TWO_TOKEN, 4, // 1 = TWO
- -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE
- 8, 10, // 3 = HIGH_LOW
- -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE
- 12, 14, // 5 = CAT_THREEFOUR
- -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE
- -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE
-};
-/* clang-format on */
-
-#if CONFIG_NEW_MULTISYMBOL
-/* Extra bits coded from LSB to MSB */
-const aom_cdf_prob av1_cat1_cdf0[CDF_SIZE(2)] = { AOM_ICDF(20352),
- AOM_ICDF(32768), 0 };
-const aom_cdf_prob *av1_cat1_cdf[] = { av1_cat1_cdf0 };
-
-const aom_cdf_prob av1_cat2_cdf0[CDF_SIZE(4)] = {
- AOM_ICDF(11963), AOM_ICDF(21121), AOM_ICDF(27719), AOM_ICDF(32768), 0
-};
-const aom_cdf_prob *av1_cat2_cdf[] = { av1_cat2_cdf0 };
-const aom_cdf_prob av1_cat3_cdf0[CDF_SIZE(8)] = {
- AOM_ICDF(7001), AOM_ICDF(12802), AOM_ICDF(17911),
- AOM_ICDF(22144), AOM_ICDF(25503), AOM_ICDF(28286),
- AOM_ICDF(30737), AOM_ICDF(32768), 0
-};
-const aom_cdf_prob *av1_cat3_cdf[] = { av1_cat3_cdf0 };
-
-const aom_cdf_prob av1_cat4_cdf0[CDF_SIZE(16)] = { AOM_ICDF(3934),
- AOM_ICDF(7460),
- AOM_ICDF(10719),
- AOM_ICDF(13640),
- AOM_ICDF(16203),
- AOM_ICDF(18500),
- AOM_ICDF(20624),
- AOM_ICDF(22528),
- AOM_ICDF(24316),
- AOM_ICDF(25919),
- AOM_ICDF(27401),
- AOM_ICDF(28729),
- AOM_ICDF(29894),
- AOM_ICDF(30938),
- AOM_ICDF(31903),
- AOM_ICDF(32768),
- 0 };
-const aom_cdf_prob *av1_cat4_cdf[] = { av1_cat4_cdf0 };
-
-const aom_cdf_prob av1_cat5_cdf0[CDF_SIZE(16)] = { AOM_ICDF(2942),
- AOM_ICDF(5794),
- AOM_ICDF(8473),
- AOM_ICDF(11069),
- AOM_ICDF(13469),
- AOM_ICDF(15795),
- AOM_ICDF(17980),
- AOM_ICDF(20097),
- AOM_ICDF(21952),
- AOM_ICDF(23750),
- AOM_ICDF(25439),
- AOM_ICDF(27076),
- AOM_ICDF(28589),
- AOM_ICDF(30056),
- AOM_ICDF(31434),
- AOM_ICDF(32768),
- 0 };
-const aom_cdf_prob av1_cat5_cdf1[CDF_SIZE(2)] = { AOM_ICDF(23040),
- AOM_ICDF(32768), 0 };
-const aom_cdf_prob *av1_cat5_cdf[] = { av1_cat5_cdf0, av1_cat5_cdf1 };
-
-const aom_cdf_prob av1_cat6_cdf0[CDF_SIZE(16)] = {
- AOM_ICDF(2382), AOM_ICDF(4727), AOM_ICDF(7036), AOM_ICDF(9309),
- AOM_ICDF(11512), AOM_ICDF(13681), AOM_ICDF(15816), AOM_ICDF(17918),
- AOM_ICDF(19892), AOM_ICDF(21835), AOM_ICDF(23748), AOM_ICDF(25632),
- AOM_ICDF(27458), AOM_ICDF(29255), AOM_ICDF(31024), AOM_ICDF(32768)
-};
-const aom_cdf_prob av1_cat6_cdf1[CDF_SIZE(16)] = {
- AOM_ICDF(9314), AOM_ICDF(15584), AOM_ICDF(19741), AOM_ICDF(22540),
- AOM_ICDF(25391), AOM_ICDF(27310), AOM_ICDF(28583), AOM_ICDF(29440),
- AOM_ICDF(30493), AOM_ICDF(31202), AOM_ICDF(31672), AOM_ICDF(31988),
- AOM_ICDF(32310), AOM_ICDF(32527), AOM_ICDF(32671), AOM_ICDF(32768)
-};
-const aom_cdf_prob av1_cat6_cdf2[CDF_SIZE(16)] = {
- AOM_ICDF(29548), AOM_ICDF(31129), AOM_ICDF(31960), AOM_ICDF(32004),
- AOM_ICDF(32473), AOM_ICDF(32498), AOM_ICDF(32511), AOM_ICDF(32512),
- AOM_ICDF(32745), AOM_ICDF(32757), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)
-};
-const aom_cdf_prob av1_cat6_cdf3[CDF_SIZE(16)] = {
- AOM_ICDF(32006), AOM_ICDF(32258), AOM_ICDF(32510), AOM_ICDF(32512),
- AOM_ICDF(32638), AOM_ICDF(32639), AOM_ICDF(32640), AOM_ICDF(32641),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)
-};
-const aom_cdf_prob av1_cat6_cdf4[CDF_SIZE(4)] = {
- AOM_ICDF(32513), AOM_ICDF(32641), AOM_ICDF(32767), AOM_ICDF(32768)
-};
-const aom_cdf_prob *av1_cat6_cdf[] = {
- av1_cat6_cdf0, av1_cat6_cdf1, av1_cat6_cdf2, av1_cat6_cdf3, av1_cat6_cdf4
-};
-#endif
-/* Extra bits coded from MSB to LSB */
-const aom_prob av1_cat1_prob[] = { 159 };
-const aom_prob av1_cat2_prob[] = { 165, 145 };
-const aom_prob av1_cat3_prob[] = { 173, 148, 140 };
-const aom_prob av1_cat4_prob[] = { 176, 155, 140, 135 };
-const aom_prob av1_cat5_prob[] = { 180, 157, 141, 134, 130 };
-const aom_prob av1_cat6_prob[] = {
- 255, 255, 255, 255, 254, 254, 254, 252, 249,
- 243, 230, 196, 177, 153, 140, 133, 130, 129
-};
-
-const uint16_t band_count_table[TX_SIZES_ALL][8] = {
-#if CONFIG_CHROMA_2X2
- { 1, 2, 2, 3, 0, 0, 0 },
-#endif
- { 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
- { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
-#if CONFIG_TX64X64
- { 1, 2, 3, 4, 11, 4096 - 21, 0 },
-#endif // CONFIG_TX64X64
- { 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
- { 1, 2, 3, 4, 11, 128 - 21, 0 }, { 1, 2, 3, 4, 11, 128 - 21, 0 },
- { 1, 2, 3, 4, 11, 512 - 21, 0 }, { 1, 2, 3, 4, 11, 512 - 21, 0 },
-#if CONFIG_TX64X64
- { 1, 2, 3, 4, 11, 2048 - 21, 0 }, { 1, 2, 3, 4, 11, 2048 - 21, 0 },
-#endif // CONFIG_TX64X64
- { 1, 2, 3, 4, 11, 64 - 21, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
- { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 256 - 21, 0 },
-};
-
-const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
-#if CONFIG_CHROMA_2X2
- { 0, 1, 3, 6, 10, 13, 16, 0 },
-#endif
- { 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
- { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
-#if CONFIG_TX64X64
- { 0, 1, 3, 6, 10, 21, 4096, 0 },
-#endif // CONFIG_TX64X64
- { 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
- { 0, 1, 3, 6, 10, 21, 128, 0 }, { 0, 1, 3, 6, 10, 21, 128, 0 },
- { 0, 1, 3, 6, 10, 21, 512, 0 }, { 0, 1, 3, 6, 10, 21, 512, 0 },
-#if CONFIG_TX64X64
- { 0, 1, 3, 6, 10, 21, 2048, 0 }, { 0, 1, 3, 6, 10, 21, 2048, 0 },
-#endif // CONFIG_TX64X64
- { 0, 1, 3, 6, 10, 21, 64, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
- { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 256, 0 },
-};
-const uint8_t av1_coefband_trans_8x8plus[MAX_TX_SQUARE] = {
- 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
- // beyond MAXBAND_INDEX+1 all values are filled as 5
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-#if CONFIG_TX64X64
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5
-#endif // CONFIG_TX64X64
-};
-
-const uint8_t av1_coefband_trans_4x8_8x4[32] = {
- 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
- 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-};
-
-const uint8_t av1_coefband_trans_4x4[16] = {
- 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
-};
-
-const uint8_t av1_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4,
- 4, 5, 5, 5, 5, 5 };
-
-// Model obtained from a 2-sided zero-centered distribution derived
-// from a Pareto distribution. The cdf of the distribution is:
-// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
-//
-// For a given beta and a given probablity of the 1-node, the alpha
-// is first solved, and then the {alpha, beta} pair is used to generate
-// the probabilities for the rest of the nodes.
-
-// beta = 8
-
-// Every odd line in this table can be generated from the even lines
-// by averaging :
-// av1_pareto8_full[l][node] = (av1_pareto8_full[l-1][node] +
-// av1_pareto8_full[l+1][node] ) >> 1;
-// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
-const aom_prob av1_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
- { 3, 86, 128, 6, 86, 23, 88, 29 },
- { 6, 86, 128, 11, 87, 42, 91, 52 },
- { 9, 86, 129, 17, 88, 61, 94, 76 },
- { 12, 86, 129, 22, 88, 77, 97, 93 },
- { 15, 87, 129, 28, 89, 93, 100, 110 },
- { 17, 87, 129, 33, 90, 105, 103, 123 },
- { 20, 88, 130, 38, 91, 118, 106, 136 },
- { 23, 88, 130, 43, 91, 128, 108, 146 },
- { 26, 89, 131, 48, 92, 139, 111, 156 },
- { 28, 89, 131, 53, 93, 147, 114, 163 },
- { 31, 90, 131, 58, 94, 156, 117, 171 },
- { 34, 90, 131, 62, 94, 163, 119, 177 },
- { 37, 90, 132, 66, 95, 171, 122, 184 },
- { 39, 90, 132, 70, 96, 177, 124, 189 },
- { 42, 91, 132, 75, 97, 183, 127, 194 },
- { 44, 91, 132, 79, 97, 188, 129, 198 },
- { 47, 92, 133, 83, 98, 193, 132, 202 },
- { 49, 92, 133, 86, 99, 197, 134, 205 },
- { 52, 93, 133, 90, 100, 201, 137, 208 },
- { 54, 93, 133, 94, 100, 204, 139, 211 },
- { 57, 94, 134, 98, 101, 208, 142, 214 },
- { 59, 94, 134, 101, 102, 211, 144, 216 },
- { 62, 94, 135, 105, 103, 214, 146, 218 },
- { 64, 94, 135, 108, 103, 216, 148, 220 },
- { 66, 95, 135, 111, 104, 219, 151, 222 },
- { 68, 95, 135, 114, 105, 221, 153, 223 },
- { 71, 96, 136, 117, 106, 224, 155, 225 },
- { 73, 96, 136, 120, 106, 225, 157, 226 },
- { 76, 97, 136, 123, 107, 227, 159, 228 },
- { 78, 97, 136, 126, 108, 229, 160, 229 },
- { 80, 98, 137, 129, 109, 231, 162, 231 },
- { 82, 98, 137, 131, 109, 232, 164, 232 },
- { 84, 98, 138, 134, 110, 234, 166, 233 },
- { 86, 98, 138, 137, 111, 235, 168, 234 },
- { 89, 99, 138, 140, 112, 236, 170, 235 },
- { 91, 99, 138, 142, 112, 237, 171, 235 },
- { 93, 100, 139, 145, 113, 238, 173, 236 },
- { 95, 100, 139, 147, 114, 239, 174, 237 },
- { 97, 101, 140, 149, 115, 240, 176, 238 },
- { 99, 101, 140, 151, 115, 241, 177, 238 },
- { 101, 102, 140, 154, 116, 242, 179, 239 },
- { 103, 102, 140, 156, 117, 242, 180, 239 },
- { 105, 103, 141, 158, 118, 243, 182, 240 },
- { 107, 103, 141, 160, 118, 243, 183, 240 },
- { 109, 104, 141, 162, 119, 244, 185, 241 },
- { 111, 104, 141, 164, 119, 244, 186, 241 },
- { 113, 104, 142, 166, 120, 245, 187, 242 },
- { 114, 104, 142, 168, 121, 245, 188, 242 },
- { 116, 105, 143, 170, 122, 246, 190, 243 },
- { 118, 105, 143, 171, 122, 246, 191, 243 },
- { 120, 106, 143, 173, 123, 247, 192, 244 },
- { 121, 106, 143, 175, 124, 247, 193, 244 },
- { 123, 107, 144, 177, 125, 248, 195, 244 },
- { 125, 107, 144, 178, 125, 248, 196, 244 },
- { 127, 108, 145, 180, 126, 249, 197, 245 },
- { 128, 108, 145, 181, 127, 249, 198, 245 },
- { 130, 109, 145, 183, 128, 249, 199, 245 },
- { 132, 109, 145, 184, 128, 249, 200, 245 },
- { 134, 110, 146, 186, 129, 250, 201, 246 },
- { 135, 110, 146, 187, 130, 250, 202, 246 },
- { 137, 111, 147, 189, 131, 251, 203, 246 },
- { 138, 111, 147, 190, 131, 251, 204, 246 },
- { 140, 112, 147, 192, 132, 251, 205, 247 },
- { 141, 112, 147, 193, 132, 251, 206, 247 },
- { 143, 113, 148, 194, 133, 251, 207, 247 },
- { 144, 113, 148, 195, 134, 251, 207, 247 },
- { 146, 114, 149, 197, 135, 252, 208, 248 },
- { 147, 114, 149, 198, 135, 252, 209, 248 },
- { 149, 115, 149, 199, 136, 252, 210, 248 },
- { 150, 115, 149, 200, 137, 252, 210, 248 },
- { 152, 115, 150, 201, 138, 252, 211, 248 },
- { 153, 115, 150, 202, 138, 252, 212, 248 },
- { 155, 116, 151, 204, 139, 253, 213, 249 },
- { 156, 116, 151, 205, 139, 253, 213, 249 },
- { 158, 117, 151, 206, 140, 253, 214, 249 },
- { 159, 117, 151, 207, 141, 253, 215, 249 },
- { 161, 118, 152, 208, 142, 253, 216, 249 },
- { 162, 118, 152, 209, 142, 253, 216, 249 },
- { 163, 119, 153, 210, 143, 253, 217, 249 },
- { 164, 119, 153, 211, 143, 253, 217, 249 },
- { 166, 120, 153, 212, 144, 254, 218, 250 },
- { 167, 120, 153, 212, 145, 254, 219, 250 },
- { 168, 121, 154, 213, 146, 254, 220, 250 },
- { 169, 121, 154, 214, 146, 254, 220, 250 },
- { 171, 122, 155, 215, 147, 254, 221, 250 },
- { 172, 122, 155, 216, 147, 254, 221, 250 },
- { 173, 123, 155, 217, 148, 254, 222, 250 },
- { 174, 123, 155, 217, 149, 254, 222, 250 },
- { 176, 124, 156, 218, 150, 254, 223, 250 },
- { 177, 124, 156, 219, 150, 254, 223, 250 },
- { 178, 125, 157, 220, 151, 254, 224, 251 },
- { 179, 125, 157, 220, 151, 254, 224, 251 },
- { 180, 126, 157, 221, 152, 254, 225, 251 },
- { 181, 126, 157, 221, 152, 254, 225, 251 },
- { 183, 127, 158, 222, 153, 254, 226, 251 },
- { 184, 127, 158, 223, 154, 254, 226, 251 },
- { 185, 128, 159, 224, 155, 255, 227, 251 },
- { 186, 128, 159, 224, 155, 255, 227, 251 },
- { 187, 129, 160, 225, 156, 255, 228, 251 },
- { 188, 130, 160, 225, 156, 255, 228, 251 },
- { 189, 131, 160, 226, 157, 255, 228, 251 },
- { 190, 131, 160, 226, 158, 255, 228, 251 },
- { 191, 132, 161, 227, 159, 255, 229, 251 },
- { 192, 132, 161, 227, 159, 255, 229, 251 },
- { 193, 133, 162, 228, 160, 255, 230, 252 },
- { 194, 133, 162, 229, 160, 255, 230, 252 },
- { 195, 134, 163, 230, 161, 255, 231, 252 },
- { 196, 134, 163, 230, 161, 255, 231, 252 },
- { 197, 135, 163, 231, 162, 255, 231, 252 },
- { 198, 135, 163, 231, 162, 255, 231, 252 },
- { 199, 136, 164, 232, 163, 255, 232, 252 },
- { 200, 136, 164, 232, 164, 255, 232, 252 },
- { 201, 137, 165, 233, 165, 255, 233, 252 },
- { 201, 137, 165, 233, 165, 255, 233, 252 },
- { 202, 138, 166, 233, 166, 255, 233, 252 },
- { 203, 138, 166, 233, 166, 255, 233, 252 },
- { 204, 139, 166, 234, 167, 255, 234, 252 },
- { 205, 139, 166, 234, 167, 255, 234, 252 },
- { 206, 140, 167, 235, 168, 255, 235, 252 },
- { 206, 140, 167, 235, 168, 255, 235, 252 },
- { 207, 141, 168, 236, 169, 255, 235, 252 },
- { 208, 141, 168, 236, 170, 255, 235, 252 },
- { 209, 142, 169, 237, 171, 255, 236, 252 },
- { 209, 143, 169, 237, 171, 255, 236, 252 },
- { 210, 144, 169, 237, 172, 255, 236, 252 },
- { 211, 144, 169, 237, 172, 255, 236, 252 },
- { 212, 145, 170, 238, 173, 255, 237, 252 },
- { 213, 145, 170, 238, 173, 255, 237, 252 },
- { 214, 146, 171, 239, 174, 255, 237, 253 },
- { 214, 146, 171, 239, 174, 255, 237, 253 },
- { 215, 147, 172, 240, 175, 255, 238, 253 },
- { 215, 147, 172, 240, 175, 255, 238, 253 },
- { 216, 148, 173, 240, 176, 255, 238, 253 },
- { 217, 148, 173, 240, 176, 255, 238, 253 },
- { 218, 149, 173, 241, 177, 255, 239, 253 },
- { 218, 149, 173, 241, 178, 255, 239, 253 },
- { 219, 150, 174, 241, 179, 255, 239, 253 },
- { 219, 151, 174, 241, 179, 255, 239, 253 },
- { 220, 152, 175, 242, 180, 255, 240, 253 },
- { 221, 152, 175, 242, 180, 255, 240, 253 },
- { 222, 153, 176, 242, 181, 255, 240, 253 },
- { 222, 153, 176, 242, 181, 255, 240, 253 },
- { 223, 154, 177, 243, 182, 255, 240, 253 },
- { 223, 154, 177, 243, 182, 255, 240, 253 },
- { 224, 155, 178, 244, 183, 255, 241, 253 },
- { 224, 155, 178, 244, 183, 255, 241, 253 },
- { 225, 156, 178, 244, 184, 255, 241, 253 },
- { 225, 157, 178, 244, 184, 255, 241, 253 },
- { 226, 158, 179, 244, 185, 255, 242, 253 },
- { 227, 158, 179, 244, 185, 255, 242, 253 },
- { 228, 159, 180, 245, 186, 255, 242, 253 },
- { 228, 159, 180, 245, 186, 255, 242, 253 },
- { 229, 160, 181, 245, 187, 255, 242, 253 },
- { 229, 160, 181, 245, 187, 255, 242, 253 },
- { 230, 161, 182, 246, 188, 255, 243, 253 },
- { 230, 162, 182, 246, 188, 255, 243, 253 },
- { 231, 163, 183, 246, 189, 255, 243, 253 },
- { 231, 163, 183, 246, 189, 255, 243, 253 },
- { 232, 164, 184, 247, 190, 255, 243, 253 },
- { 232, 164, 184, 247, 190, 255, 243, 253 },
- { 233, 165, 185, 247, 191, 255, 244, 253 },
- { 233, 165, 185, 247, 191, 255, 244, 253 },
- { 234, 166, 185, 247, 192, 255, 244, 253 },
- { 234, 167, 185, 247, 192, 255, 244, 253 },
- { 235, 168, 186, 248, 193, 255, 244, 253 },
- { 235, 168, 186, 248, 193, 255, 244, 253 },
- { 236, 169, 187, 248, 194, 255, 244, 253 },
- { 236, 169, 187, 248, 194, 255, 244, 253 },
- { 236, 170, 188, 248, 195, 255, 245, 253 },
- { 236, 170, 188, 248, 195, 255, 245, 253 },
- { 237, 171, 189, 249, 196, 255, 245, 254 },
- { 237, 172, 189, 249, 196, 255, 245, 254 },
- { 238, 173, 190, 249, 197, 255, 245, 254 },
- { 238, 173, 190, 249, 197, 255, 245, 254 },
- { 239, 174, 191, 249, 198, 255, 245, 254 },
- { 239, 174, 191, 249, 198, 255, 245, 254 },
- { 240, 175, 192, 249, 199, 255, 246, 254 },
- { 240, 176, 192, 249, 199, 255, 246, 254 },
- { 240, 177, 193, 250, 200, 255, 246, 254 },
- { 240, 177, 193, 250, 200, 255, 246, 254 },
- { 241, 178, 194, 250, 201, 255, 246, 254 },
- { 241, 178, 194, 250, 201, 255, 246, 254 },
- { 242, 179, 195, 250, 202, 255, 246, 254 },
- { 242, 180, 195, 250, 202, 255, 246, 254 },
- { 242, 181, 196, 250, 203, 255, 247, 254 },
- { 242, 181, 196, 250, 203, 255, 247, 254 },
- { 243, 182, 197, 251, 204, 255, 247, 254 },
- { 243, 183, 197, 251, 204, 255, 247, 254 },
- { 244, 184, 198, 251, 205, 255, 247, 254 },
- { 244, 184, 198, 251, 205, 255, 247, 254 },
- { 244, 185, 199, 251, 206, 255, 247, 254 },
- { 244, 185, 199, 251, 206, 255, 247, 254 },
- { 245, 186, 200, 251, 207, 255, 247, 254 },
- { 245, 187, 200, 251, 207, 255, 247, 254 },
- { 246, 188, 201, 252, 207, 255, 248, 254 },
- { 246, 188, 201, 252, 207, 255, 248, 254 },
- { 246, 189, 202, 252, 208, 255, 248, 254 },
- { 246, 190, 202, 252, 208, 255, 248, 254 },
- { 247, 191, 203, 252, 209, 255, 248, 254 },
- { 247, 191, 203, 252, 209, 255, 248, 254 },
- { 247, 192, 204, 252, 210, 255, 248, 254 },
- { 247, 193, 204, 252, 210, 255, 248, 254 },
- { 248, 194, 205, 252, 211, 255, 248, 254 },
- { 248, 194, 205, 252, 211, 255, 248, 254 },
- { 248, 195, 206, 252, 212, 255, 249, 254 },
- { 248, 196, 206, 252, 212, 255, 249, 254 },
- { 249, 197, 207, 253, 213, 255, 249, 254 },
- { 249, 197, 207, 253, 213, 255, 249, 254 },
- { 249, 198, 208, 253, 214, 255, 249, 254 },
- { 249, 199, 209, 253, 214, 255, 249, 254 },
- { 250, 200, 210, 253, 215, 255, 249, 254 },
- { 250, 200, 210, 253, 215, 255, 249, 254 },
- { 250, 201, 211, 253, 215, 255, 249, 254 },
- { 250, 202, 211, 253, 215, 255, 249, 254 },
- { 250, 203, 212, 253, 216, 255, 249, 254 },
- { 250, 203, 212, 253, 216, 255, 249, 254 },
- { 251, 204, 213, 253, 217, 255, 250, 254 },
- { 251, 205, 213, 253, 217, 255, 250, 254 },
- { 251, 206, 214, 254, 218, 255, 250, 254 },
- { 251, 206, 215, 254, 218, 255, 250, 254 },
- { 252, 207, 216, 254, 219, 255, 250, 254 },
- { 252, 208, 216, 254, 219, 255, 250, 254 },
- { 252, 209, 217, 254, 220, 255, 250, 254 },
- { 252, 210, 217, 254, 220, 255, 250, 254 },
- { 252, 211, 218, 254, 221, 255, 250, 254 },
- { 252, 212, 218, 254, 221, 255, 250, 254 },
- { 253, 213, 219, 254, 222, 255, 250, 254 },
- { 253, 213, 220, 254, 222, 255, 250, 254 },
- { 253, 214, 221, 254, 223, 255, 250, 254 },
- { 253, 215, 221, 254, 223, 255, 250, 254 },
- { 253, 216, 222, 254, 224, 255, 251, 254 },
- { 253, 217, 223, 254, 224, 255, 251, 254 },
- { 253, 218, 224, 254, 225, 255, 251, 254 },
- { 253, 219, 224, 254, 225, 255, 251, 254 },
- { 254, 220, 225, 254, 225, 255, 251, 254 },
- { 254, 221, 226, 254, 225, 255, 251, 254 },
- { 254, 222, 227, 255, 226, 255, 251, 254 },
- { 254, 223, 227, 255, 226, 255, 251, 254 },
- { 254, 224, 228, 255, 227, 255, 251, 254 },
- { 254, 225, 229, 255, 227, 255, 251, 254 },
- { 254, 226, 230, 255, 228, 255, 251, 254 },
- { 254, 227, 230, 255, 229, 255, 251, 254 },
- { 255, 228, 231, 255, 230, 255, 251, 254 },
- { 255, 229, 232, 255, 230, 255, 251, 254 },
- { 255, 230, 233, 255, 231, 255, 252, 254 },
- { 255, 231, 234, 255, 231, 255, 252, 254 },
- { 255, 232, 235, 255, 232, 255, 252, 254 },
- { 255, 233, 236, 255, 232, 255, 252, 254 },
- { 255, 235, 237, 255, 233, 255, 252, 254 },
- { 255, 236, 238, 255, 234, 255, 252, 254 },
- { 255, 238, 240, 255, 235, 255, 252, 255 },
- { 255, 239, 241, 255, 235, 255, 252, 254 },
- { 255, 241, 243, 255, 236, 255, 252, 254 },
- { 255, 243, 245, 255, 237, 255, 252, 254 },
- { 255, 246, 247, 255, 239, 255, 253, 255 },
-};
-
-// Model obtained from a 2-sided zero-centered distribution derived
-// from a Pareto distribution. The cdf of the distribution is:
-// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
-//
-// For a given beta and a given probability of the 1-node, the alpha
-// is first solved, and then the {alpha, beta} pair is used to generate
-// the probabilities for the rest of the nodes.
-//
-// The full source code of the generating program is available in:
-// tools/gen_constrained_tokenset.py
-//
-// Values for tokens TWO_TOKEN through CATEGORY6_TOKEN included
-// in the table here : the ONE_TOKEN probability is
-// removed and the probabilities rescaled.
-//
-// ZERO_TOKEN and ONE_TOKEN are coded as one CDF,
-// and EOB_TOKEN is coded as flags outside this coder.
-const aom_cdf_prob av1_pareto8_tail_probs[COEFF_PROB_MODELS][TAIL_NODES] = {
- { 128, 127, 127, 252, 497, 969, 1839, 3318, 25511 },
- { 256, 254, 251, 496, 966, 1834, 3308, 5408, 19995 },
- { 383, 378, 373, 732, 1408, 2605, 4470, 6646, 15773 },
- { 511, 502, 493, 961, 1824, 3289, 5373, 7298, 12517 },
- { 638, 625, 611, 1182, 2215, 3894, 6064, 7548, 9991 },
- { 766, 746, 726, 1396, 2582, 4428, 6578, 7529, 8017 },
- { 893, 866, 839, 1603, 2927, 4896, 6945, 7332, 6467 },
- { 1020, 984, 950, 1803, 3250, 5305, 7191, 7022, 5243 },
- { 1147, 1102, 1059, 1996, 3552, 5659, 7338, 6646, 4269 },
- { 1274, 1218, 1166, 2183, 3835, 5963, 7403, 6234, 3492 },
- { 1400, 1334, 1270, 2363, 4099, 6223, 7401, 5809, 2869 },
- { 1527, 1447, 1372, 2537, 4345, 6442, 7346, 5386, 2366 },
- { 1654, 1560, 1473, 2704, 4574, 6624, 7247, 4973, 1959 },
- { 1780, 1672, 1571, 2866, 4787, 6771, 7114, 4579, 1628 },
- { 1906, 1782, 1667, 3022, 4984, 6889, 6954, 4206, 1358 },
- { 2032, 1891, 1762, 3172, 5167, 6979, 6773, 3856, 1136 },
- { 2158, 2000, 1854, 3316, 5335, 7044, 6577, 3530, 954 },
- { 2284, 2106, 1944, 3455, 5490, 7087, 6370, 3229, 803 },
- { 2410, 2212, 2032, 3588, 5632, 7109, 6155, 2951, 679 },
- { 2535, 2317, 2119, 3717, 5761, 7113, 5936, 2695, 575 },
- { 2661, 2420, 2203, 3840, 5880, 7101, 5714, 2461, 488 },
- { 2786, 2522, 2286, 3958, 5987, 7074, 5493, 2246, 416 },
- { 2911, 2624, 2367, 4072, 6083, 7033, 5273, 2050, 355 },
- { 3037, 2724, 2446, 4180, 6170, 6981, 5055, 1871, 304 },
- { 3162, 2822, 2523, 4284, 6247, 6919, 4842, 1708, 261 },
- { 3286, 2920, 2599, 4384, 6315, 6848, 4633, 1559, 224 },
- { 3411, 3017, 2672, 4478, 6374, 6768, 4430, 1424, 194 },
- { 3536, 3112, 2745, 4569, 6426, 6681, 4232, 1300, 167 },
- { 3660, 3207, 2815, 4656, 6469, 6588, 4040, 1188, 145 },
- { 3785, 3300, 2883, 4738, 6505, 6490, 3855, 1086, 126 },
- { 3909, 3392, 2950, 4817, 6534, 6387, 3677, 993, 109 },
- { 4033, 3483, 3015, 4891, 6557, 6281, 3505, 908, 95 },
- { 4157, 3573, 3079, 4962, 6573, 6170, 3340, 831, 83 },
- { 4281, 3662, 3141, 5029, 6584, 6058, 3181, 760, 72 },
- { 4405, 3750, 3201, 5093, 6588, 5943, 3029, 696, 63 },
- { 4529, 3837, 3260, 5152, 6587, 5826, 2883, 638, 56 },
- { 4652, 3922, 3317, 5209, 6582, 5709, 2744, 584, 49 },
- { 4775, 4007, 3373, 5262, 6572, 5590, 2610, 536, 43 },
- { 4899, 4090, 3427, 5312, 6557, 5470, 2483, 492, 38 },
- { 5022, 4173, 3480, 5359, 6538, 5351, 2361, 451, 33 },
- { 5145, 4254, 3531, 5403, 6515, 5231, 2246, 414, 29 },
- { 5268, 4334, 3581, 5443, 6489, 5112, 2135, 380, 26 },
- { 5391, 4414, 3629, 5481, 6458, 4993, 2029, 350, 23 },
- { 5514, 4492, 3676, 5515, 6425, 4875, 1929, 321, 21 },
- { 5637, 4569, 3721, 5548, 6388, 4758, 1833, 296, 18 },
- { 5759, 4645, 3766, 5577, 6349, 4642, 1742, 272, 16 },
- { 5881, 4720, 3808, 5604, 6307, 4528, 1656, 250, 14 },
- { 6004, 4794, 3849, 5628, 6262, 4414, 1573, 231, 13 },
- { 6126, 4867, 3890, 5649, 6215, 4302, 1495, 213, 11 },
- { 6248, 4939, 3928, 5669, 6166, 4192, 1420, 196, 10 },
- { 6370, 5010, 3966, 5686, 6114, 4083, 1349, 181, 9 },
- { 6492, 5080, 4002, 5700, 6061, 3976, 1282, 167, 8 },
- { 6614, 5149, 4037, 5712, 6006, 3871, 1218, 154, 7 },
- { 6735, 5217, 4070, 5723, 5950, 3767, 1157, 142, 7 },
- { 6857, 5284, 4103, 5731, 5891, 3666, 1099, 131, 6 },
- { 6978, 5351, 4134, 5737, 5832, 3566, 1044, 121, 5 },
- { 7099, 5415, 4164, 5741, 5771, 3469, 992, 112, 5 },
- { 7221, 5479, 4192, 5743, 5709, 3373, 943, 104, 4 },
- { 7342, 5542, 4220, 5743, 5646, 3279, 896, 96, 4 },
- { 7462, 5604, 4246, 5742, 5583, 3187, 851, 89, 4 },
- { 7584, 5665, 4272, 5739, 5518, 3097, 808, 82, 3 },
- { 7704, 5725, 4296, 5734, 5453, 3009, 768, 76, 3 },
- { 7825, 5784, 4318, 5727, 5386, 2924, 730, 71, 3 },
- { 7945, 5843, 4341, 5719, 5320, 2840, 693, 65, 2 },
- { 8066, 5900, 4361, 5709, 5252, 2758, 659, 61, 2 },
- { 8186, 5956, 4381, 5698, 5185, 2678, 626, 56, 2 },
- { 8306, 6011, 4400, 5685, 5117, 2600, 595, 52, 2 },
- { 8426, 6066, 4418, 5671, 5049, 2523, 565, 48, 2 },
- { 8547, 6119, 4434, 5655, 4981, 2449, 537, 45, 1 },
- { 8666, 6171, 4450, 5638, 4912, 2377, 511, 42, 1 },
- { 8786, 6223, 4465, 5620, 4843, 2306, 485, 39, 1 },
- { 8906, 6274, 4478, 5600, 4775, 2237, 461, 36, 1 },
- { 9025, 6323, 4491, 5580, 4706, 2170, 438, 34, 1 },
- { 9144, 6372, 4503, 5558, 4637, 2105, 417, 31, 1 },
- { 9264, 6420, 4514, 5535, 4568, 2041, 396, 29, 1 },
- { 9383, 6467, 4524, 5511, 4500, 1979, 376, 27, 1 },
- { 9502, 6513, 4532, 5486, 4432, 1919, 358, 25, 1 },
- { 9621, 6558, 4541, 5460, 4364, 1860, 340, 23, 1 },
- { 9740, 6602, 4548, 5433, 4296, 1803, 323, 22, 1 },
- { 9859, 6645, 4554, 5405, 4229, 1748, 307, 20, 1 },
- { 9978, 6688, 4559, 5376, 4161, 1694, 292, 19, 1 },
- { 10096, 6729, 4564, 5347, 4094, 1641, 278, 18, 1 },
- { 10215, 6770, 4568, 5316, 4028, 1590, 264, 16, 1 },
- { 10333, 6809, 4571, 5285, 3962, 1541, 251, 15, 1 },
- { 10452, 6848, 4573, 5253, 3896, 1492, 239, 14, 1 },
- { 10570, 6886, 4574, 5220, 3831, 1446, 227, 13, 1 },
- { 10688, 6923, 4575, 5186, 3767, 1400, 216, 12, 1 },
- { 10806, 6959, 4575, 5152, 3702, 1356, 205, 12, 1 },
- { 10924, 6994, 4574, 5117, 3639, 1313, 195, 11, 1 },
- { 11041, 7029, 4572, 5082, 3576, 1271, 186, 10, 1 },
- { 11159, 7062, 4570, 5046, 3513, 1231, 177, 9, 1 },
- { 11277, 7095, 4566, 5009, 3451, 1192, 168, 9, 1 },
- { 11394, 7127, 4563, 4972, 3390, 1153, 160, 8, 1 },
- { 11512, 7158, 4558, 4934, 3329, 1116, 152, 8, 1 },
- { 11629, 7188, 4553, 4896, 3269, 1080, 145, 7, 1 },
- { 11746, 7217, 4547, 4857, 3210, 1045, 138, 7, 1 },
- { 11864, 7245, 4540, 4818, 3151, 1012, 131, 6, 1 },
- { 11980, 7273, 4533, 4779, 3093, 979, 124, 6, 1 },
- { 12097, 7300, 4525, 4739, 3035, 947, 118, 6, 1 },
- { 12215, 7326, 4516, 4698, 2978, 916, 113, 5, 1 },
- { 12331, 7351, 4507, 4658, 2922, 886, 107, 5, 1 },
- { 12448, 7375, 4497, 4617, 2866, 857, 102, 5, 1 },
- { 12564, 7398, 4487, 4576, 2812, 829, 97, 4, 1 },
- { 12681, 7421, 4476, 4534, 2757, 802, 92, 4, 1 },
- { 12797, 7443, 4464, 4492, 2704, 775, 88, 4, 1 },
- { 12914, 7464, 4452, 4450, 2651, 749, 84, 3, 1 },
- { 13030, 7484, 4439, 4408, 2599, 725, 79, 3, 1 },
- { 13147, 7503, 4426, 4365, 2547, 700, 76, 3, 1 },
- { 13262, 7522, 4412, 4322, 2497, 677, 72, 3, 1 },
- { 13378, 7539, 4398, 4280, 2447, 654, 68, 3, 1 },
- { 13494, 7556, 4383, 4237, 2397, 632, 65, 3, 1 },
- { 13610, 7573, 4368, 4193, 2348, 611, 62, 2, 1 },
- { 13726, 7588, 4352, 4150, 2300, 590, 59, 2, 1 },
- { 13841, 7602, 4335, 4107, 2253, 571, 56, 2, 1 },
- { 13957, 7616, 4318, 4063, 2207, 551, 53, 2, 1 },
- { 14072, 7629, 4301, 4019, 2161, 532, 51, 2, 1 },
- { 14188, 7641, 4283, 3976, 2115, 514, 48, 2, 1 },
- { 14302, 7652, 4265, 3932, 2071, 497, 46, 2, 1 },
- { 14418, 7663, 4246, 3888, 2027, 480, 44, 1, 1 },
- { 14533, 7673, 4227, 3844, 1984, 463, 42, 1, 1 },
- { 14649, 7682, 4207, 3800, 1941, 447, 40, 1, 1 },
- { 14763, 7690, 4187, 3757, 1899, 432, 38, 1, 1 },
- { 14878, 7698, 4166, 3713, 1858, 417, 36, 1, 1 },
- { 14993, 7705, 4146, 3669, 1817, 402, 34, 1, 1 },
- { 15109, 7711, 4124, 3625, 1777, 388, 32, 1, 1 },
- { 15223, 7715, 4103, 3581, 1738, 375, 31, 1, 1 },
- { 15337, 7720, 4081, 3538, 1699, 362, 29, 1, 1 },
- { 15452, 7724, 4058, 3494, 1661, 349, 28, 1, 1 },
- { 15567, 7727, 4035, 3450, 1624, 337, 26, 1, 1 },
- { 15681, 7729, 4012, 3407, 1587, 325, 25, 1, 1 },
- { 15795, 7730, 3989, 3364, 1551, 313, 24, 1, 1 },
- { 15909, 7731, 3965, 3320, 1516, 302, 23, 1, 1 },
- { 16024, 7731, 3940, 3277, 1481, 291, 22, 1, 1 },
- { 16138, 7730, 3916, 3234, 1446, 281, 21, 1, 1 },
- { 16252, 7728, 3891, 3191, 1413, 271, 20, 1, 1 },
- { 16366, 7726, 3866, 3148, 1380, 261, 19, 1, 1 },
- { 16480, 7723, 3840, 3106, 1347, 252, 18, 1, 1 },
- { 16594, 7720, 3814, 3063, 1315, 243, 17, 1, 1 },
- { 16708, 7715, 3788, 3021, 1284, 234, 16, 1, 1 },
- { 16822, 7710, 3762, 2979, 1253, 225, 15, 1, 1 },
- { 16936, 7704, 3735, 2937, 1223, 217, 14, 1, 1 },
- { 17050, 7697, 3708, 2895, 1193, 209, 14, 1, 1 },
- { 17162, 7690, 3681, 2854, 1164, 202, 13, 1, 1 },
- { 17276, 7682, 3654, 2812, 1136, 194, 12, 1, 1 },
- { 17389, 7673, 3626, 2771, 1108, 187, 12, 1, 1 },
- { 17504, 7663, 3598, 2730, 1080, 180, 11, 1, 1 },
- { 17617, 7653, 3570, 2689, 1053, 173, 11, 1, 1 },
- { 17730, 7642, 3541, 2649, 1027, 167, 10, 1, 1 },
- { 17843, 7630, 3513, 2608, 1001, 161, 10, 1, 1 },
- { 17957, 7618, 3484, 2569, 975, 154, 9, 1, 1 },
- { 18069, 7605, 3455, 2529, 950, 149, 9, 1, 1 },
- { 18183, 7591, 3426, 2489, 926, 143, 8, 1, 1 },
- { 18296, 7576, 3396, 2450, 902, 138, 8, 1, 1 },
- { 18410, 7562, 3366, 2411, 878, 132, 7, 1, 1 },
- { 18523, 7545, 3337, 2372, 855, 127, 7, 1, 1 },
- { 18636, 7529, 3306, 2333, 833, 122, 7, 1, 1 },
- { 18749, 7511, 3276, 2295, 811, 118, 6, 1, 1 },
- { 18862, 7493, 3246, 2257, 789, 113, 6, 1, 1 },
- { 18975, 7474, 3215, 2219, 768, 109, 6, 1, 1 },
- { 19088, 7455, 3185, 2182, 747, 104, 5, 1, 1 },
- { 19201, 7435, 3154, 2144, 727, 100, 5, 1, 1 },
- { 19314, 7414, 3123, 2107, 707, 96, 5, 1, 1 },
- { 19427, 7392, 3092, 2071, 687, 92, 5, 1, 1 },
- { 19541, 7370, 3060, 2034, 668, 89, 4, 1, 1 },
- { 19654, 7347, 3029, 1998, 649, 85, 4, 1, 1 },
- { 19766, 7323, 2997, 1963, 631, 82, 4, 1, 1 },
- { 19878, 7299, 2966, 1927, 613, 79, 4, 1, 1 },
- { 19991, 7274, 2934, 1892, 596, 75, 4, 1, 1 },
- { 20105, 7248, 2902, 1857, 579, 72, 3, 1, 1 },
- { 20218, 7222, 2870, 1822, 562, 69, 3, 1, 1 },
- { 20331, 7195, 2838, 1788, 545, 66, 3, 1, 1 },
- { 20443, 7167, 2806, 1754, 529, 64, 3, 1, 1 },
- { 20556, 7138, 2774, 1720, 514, 61, 3, 1, 1 },
- { 20670, 7109, 2741, 1687, 498, 58, 3, 1, 1 },
- { 20783, 7079, 2709, 1654, 483, 56, 2, 1, 1 },
- { 20895, 7049, 2676, 1621, 469, 54, 2, 1, 1 },
- { 21008, 7017, 2644, 1589, 455, 51, 2, 1, 1 },
- { 21121, 6985, 2611, 1557, 441, 49, 2, 1, 1 },
- { 21234, 6953, 2578, 1525, 427, 47, 2, 1, 1 },
- { 21347, 6919, 2545, 1494, 414, 45, 2, 1, 1 },
- { 21460, 6885, 2513, 1462, 401, 43, 2, 1, 1 },
- { 21573, 6850, 2480, 1432, 388, 41, 2, 1, 1 },
- { 21687, 6815, 2447, 1401, 375, 39, 2, 1, 1 },
- { 21801, 6778, 2414, 1371, 363, 38, 1, 1, 1 },
- { 21914, 6741, 2381, 1341, 352, 36, 1, 1, 1 },
- { 22028, 6704, 2348, 1311, 340, 34, 1, 1, 1 },
- { 22141, 6665, 2315, 1282, 329, 33, 1, 1, 1 },
- { 22255, 6626, 2282, 1253, 318, 31, 1, 1, 1 },
- { 22368, 6586, 2249, 1225, 307, 30, 1, 1, 1 },
- { 22482, 6546, 2216, 1196, 297, 28, 1, 1, 1 },
- { 22595, 6505, 2183, 1169, 286, 27, 1, 1, 1 },
- { 22709, 6463, 2149, 1141, 277, 26, 1, 1, 1 },
- { 22823, 6420, 2116, 1114, 267, 25, 1, 1, 1 },
- { 22938, 6377, 2083, 1087, 257, 23, 1, 1, 1 },
- { 23053, 6332, 2050, 1060, 248, 22, 1, 1, 1 },
- { 23167, 6287, 2017, 1034, 239, 21, 1, 1, 1 },
- { 23280, 6242, 1984, 1008, 231, 20, 1, 1, 1 },
- { 23396, 6195, 1951, 982, 222, 19, 1, 1, 1 },
- { 23510, 6148, 1918, 957, 214, 18, 1, 1, 1 },
- { 23625, 6100, 1885, 932, 206, 17, 1, 1, 1 },
- { 23741, 6051, 1852, 907, 198, 16, 1, 1, 1 },
- { 23855, 6002, 1819, 883, 190, 16, 1, 1, 1 },
- { 23971, 5951, 1786, 859, 183, 15, 1, 1, 1 },
- { 24087, 5900, 1753, 835, 176, 14, 1, 1, 1 },
- { 24203, 5848, 1720, 812, 169, 13, 1, 1, 1 },
- { 24318, 5796, 1687, 789, 162, 13, 1, 1, 1 },
- { 24435, 5742, 1655, 766, 155, 12, 1, 1, 1 },
- { 24552, 5688, 1622, 743, 149, 11, 1, 1, 1 },
- { 24669, 5632, 1589, 721, 143, 11, 1, 1, 1 },
- { 24786, 5576, 1557, 699, 137, 10, 1, 1, 1 },
- { 24903, 5519, 1524, 678, 131, 10, 1, 1, 1 },
- { 25021, 5462, 1491, 657, 125, 9, 1, 1, 1 },
- { 25139, 5403, 1459, 636, 120, 8, 1, 1, 1 },
- { 25258, 5343, 1427, 615, 114, 8, 1, 1, 1 },
- { 25376, 5283, 1394, 595, 109, 8, 1, 1, 1 },
- { 25496, 5221, 1362, 575, 104, 7, 1, 1, 1 },
- { 25614, 5159, 1330, 556, 99, 7, 1, 1, 1 },
- { 25735, 5096, 1298, 536, 94, 6, 1, 1, 1 },
- { 25856, 5031, 1265, 517, 90, 6, 1, 1, 1 },
- { 25977, 4966, 1233, 499, 85, 5, 1, 1, 1 },
- { 26098, 4899, 1202, 480, 81, 5, 1, 1, 1 },
- { 26220, 4831, 1170, 462, 77, 5, 1, 1, 1 },
- { 26343, 4763, 1138, 444, 73, 4, 1, 1, 1 },
- { 26466, 4693, 1106, 427, 69, 4, 1, 1, 1 },
- { 26589, 4622, 1075, 410, 65, 4, 1, 1, 1 },
- { 26713, 4550, 1043, 393, 62, 4, 1, 1, 1 },
- { 26840, 4476, 1012, 376, 58, 3, 1, 1, 1 },
- { 26966, 4401, 980, 360, 55, 3, 1, 1, 1 },
- { 27092, 4325, 949, 344, 52, 3, 1, 1, 1 },
- { 27220, 4248, 918, 328, 48, 3, 1, 1, 1 },
- { 27350, 4169, 886, 313, 45, 2, 1, 1, 1 },
- { 27480, 4088, 855, 298, 42, 2, 1, 1, 1 },
- { 27610, 4006, 824, 283, 40, 2, 1, 1, 1 },
- { 27743, 3922, 793, 268, 37, 2, 1, 1, 1 },
- { 27876, 3837, 762, 254, 34, 2, 1, 1, 1 },
- { 28011, 3749, 731, 240, 32, 2, 1, 1, 1 },
- { 28147, 3659, 701, 227, 30, 1, 1, 1, 1 },
- { 28286, 3568, 670, 213, 27, 1, 1, 1, 1 },
- { 28426, 3474, 639, 200, 25, 1, 1, 1, 1 },
- { 28569, 3377, 608, 187, 23, 1, 1, 1, 1 },
- { 28714, 3278, 577, 174, 21, 1, 1, 1, 1 },
- { 28860, 3176, 547, 162, 19, 1, 1, 1, 1 },
- { 29010, 3071, 516, 150, 17, 1, 1, 1, 1 },
- { 29163, 2962, 485, 138, 16, 1, 1, 1, 1 },
- { 29320, 2849, 454, 127, 14, 1, 1, 1, 1 },
- { 29483, 2731, 423, 115, 12, 1, 1, 1, 1 },
- { 29650, 2608, 391, 104, 11, 1, 1, 1, 1 },
- { 29823, 2479, 360, 93, 9, 1, 1, 1, 1 },
- { 30002, 2343, 328, 83, 8, 1, 1, 1, 1 },
- { 30192, 2198, 295, 72, 7, 1, 1, 1, 1 },
- { 30393, 2041, 262, 62, 6, 1, 1, 1, 1 },
- { 30612, 1869, 227, 52, 4, 1, 1, 1, 1 },
- { 30853, 1676, 191, 41, 3, 1, 1, 1, 1 },
- { 31131, 1448, 152, 31, 2, 1, 1, 1, 1 },
- { 31486, 1150, 107, 20, 1, 1, 1, 1, 1 },
-};
-
-#if !CONFIG_Q_ADAPT_PROBS
-static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
- { // Y plane
- { // Intra
- { // Band 0
- { AOM_ICDF(25024), AOM_ICDF(25863), AOM_ICDF(27361), AOM_ICDF(29796),
- AOM_ICDF(30374), AOM_ICDF(32768) },
- { AOM_ICDF(10816), AOM_ICDF(14127), AOM_ICDF(17116), AOM_ICDF(23516),
- AOM_ICDF(24999), AOM_ICDF(32768) },
- { AOM_ICDF(1088), AOM_ICDF(6358), AOM_ICDF(8428), AOM_ICDF(16648),
- AOM_ICDF(18276), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(14529), AOM_ICDF(18769), AOM_ICDF(29100), AOM_ICDF(29634),
- AOM_ICDF(32768) },
- { AOM_ICDF(12993), AOM_ICDF(17117), AOM_ICDF(28404), AOM_ICDF(28988),
- AOM_ICDF(32768) },
- { AOM_ICDF(11201), AOM_ICDF(14084), AOM_ICDF(25818), AOM_ICDF(26504),
- AOM_ICDF(32768) },
- { AOM_ICDF(9793), AOM_ICDF(11267), AOM_ICDF(21775), AOM_ICDF(22451),
- AOM_ICDF(32768) },
- { AOM_ICDF(7105), AOM_ICDF(7562), AOM_ICDF(15777), AOM_ICDF(16225),
- AOM_ICDF(32768) },
- { AOM_ICDF(3905), AOM_ICDF(3966), AOM_ICDF(8359), AOM_ICDF(8526),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(20033), AOM_ICDF(23643), AOM_ICDF(31102), AOM_ICDF(31374),
- AOM_ICDF(32768) },
- { AOM_ICDF(16321), AOM_ICDF(20350), AOM_ICDF(30167), AOM_ICDF(30546),
- AOM_ICDF(32768) },
- { AOM_ICDF(12993), AOM_ICDF(15512), AOM_ICDF(26859), AOM_ICDF(27396),
- AOM_ICDF(32768) },
- { AOM_ICDF(10305), AOM_ICDF(11659), AOM_ICDF(21669), AOM_ICDF(22330),
- AOM_ICDF(32768) },
- { AOM_ICDF(7361), AOM_ICDF(7819), AOM_ICDF(15450), AOM_ICDF(15940),
- AOM_ICDF(32768) },
- { AOM_ICDF(3521), AOM_ICDF(3580), AOM_ICDF(7805), AOM_ICDF(7976),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(21057), AOM_ICDF(25460), AOM_ICDF(31740), AOM_ICDF(31952),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(21173), AOM_ICDF(30761), AOM_ICDF(31092),
- AOM_ICDF(32768) },
- { AOM_ICDF(11841), AOM_ICDF(14615), AOM_ICDF(26188), AOM_ICDF(26824),
- AOM_ICDF(32768) },
- { AOM_ICDF(7745), AOM_ICDF(8991), AOM_ICDF(18937), AOM_ICDF(19707),
- AOM_ICDF(32768) },
- { AOM_ICDF(4417), AOM_ICDF(4706), AOM_ICDF(10342), AOM_ICDF(10890),
- AOM_ICDF(32768) },
- { AOM_ICDF(7617), AOM_ICDF(8392), AOM_ICDF(17295), AOM_ICDF(17915),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(20417), AOM_ICDF(26452), AOM_ICDF(32166), AOM_ICDF(32321),
- AOM_ICDF(32768) },
- { AOM_ICDF(15809), AOM_ICDF(21634), AOM_ICDF(30947), AOM_ICDF(31298),
- AOM_ICDF(32768) },
- { AOM_ICDF(10049), AOM_ICDF(12176), AOM_ICDF(23495), AOM_ICDF(24229),
- AOM_ICDF(32768) },
- { AOM_ICDF(5953), AOM_ICDF(6731), AOM_ICDF(16166), AOM_ICDF(16798),
- AOM_ICDF(32768) },
- { AOM_ICDF(6081), AOM_ICDF(6188), AOM_ICDF(8114), AOM_ICDF(8764),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2291), AOM_ICDF(4448), AOM_ICDF(5527),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(9153), AOM_ICDF(25905), AOM_ICDF(31431), AOM_ICDF(31934),
- AOM_ICDF(32768) },
- { AOM_ICDF(9025), AOM_ICDF(23345), AOM_ICDF(30033), AOM_ICDF(30965),
- AOM_ICDF(32768) },
- { AOM_ICDF(5953), AOM_ICDF(13835), AOM_ICDF(22032), AOM_ICDF(24664),
- AOM_ICDF(32768) },
- { AOM_ICDF(6337), AOM_ICDF(11435), AOM_ICDF(18366), AOM_ICDF(21418),
- AOM_ICDF(32768) },
- { AOM_ICDF(3137), AOM_ICDF(4871), AOM_ICDF(8519), AOM_ICDF(12426),
- AOM_ICDF(32768) },
- { AOM_ICDF(1857), AOM_ICDF(2727), AOM_ICDF(5540), AOM_ICDF(8757),
- AOM_ICDF(32768) } } },
- { // Intra
- { // Band 0
- { AOM_ICDF(24512), AOM_ICDF(26673), AOM_ICDF(28962), AOM_ICDF(31929),
- AOM_ICDF(32126), AOM_ICDF(32768) },
- { AOM_ICDF(15936), AOM_ICDF(21711), AOM_ICDF(25569), AOM_ICDF(30899),
- AOM_ICDF(31305), AOM_ICDF(32768) },
- { AOM_ICDF(3264), AOM_ICDF(14756), AOM_ICDF(20107), AOM_ICDF(29407),
- AOM_ICDF(30032), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(21313), AOM_ICDF(26020), AOM_ICDF(32523), AOM_ICDF(32575),
- AOM_ICDF(32768) },
- { AOM_ICDF(18369), AOM_ICDF(24215), AOM_ICDF(32291), AOM_ICDF(32391),
- AOM_ICDF(32768) },
- { AOM_ICDF(15297), AOM_ICDF(19637), AOM_ICDF(30414), AOM_ICDF(30752),
- AOM_ICDF(32768) },
- { AOM_ICDF(11713), AOM_ICDF(14040), AOM_ICDF(25408), AOM_ICDF(26033),
- AOM_ICDF(32768) },
- { AOM_ICDF(9537), AOM_ICDF(10173), AOM_ICDF(18839), AOM_ICDF(19315),
- AOM_ICDF(32768) },
- { AOM_ICDF(9025), AOM_ICDF(9093), AOM_ICDF(13987), AOM_ICDF(14115),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(22721), AOM_ICDF(27599), AOM_ICDF(32592), AOM_ICDF(32636),
- AOM_ICDF(32768) },
- { AOM_ICDF(19009), AOM_ICDF(24676), AOM_ICDF(32258), AOM_ICDF(32367),
- AOM_ICDF(32768) },
- { AOM_ICDF(12737), AOM_ICDF(16769), AOM_ICDF(28739), AOM_ICDF(29247),
- AOM_ICDF(32768) },
- { AOM_ICDF(8769), AOM_ICDF(10956), AOM_ICDF(21941), AOM_ICDF(22840),
- AOM_ICDF(32768) },
- { AOM_ICDF(6721), AOM_ICDF(7678), AOM_ICDF(15319), AOM_ICDF(16290),
- AOM_ICDF(32768) },
- { AOM_ICDF(4417), AOM_ICDF(4430), AOM_ICDF(4583), AOM_ICDF(5712),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(22849), AOM_ICDF(28333), AOM_ICDF(32633), AOM_ICDF(32671),
- AOM_ICDF(32768) },
- { AOM_ICDF(18497), AOM_ICDF(24619), AOM_ICDF(32184), AOM_ICDF(32315),
- AOM_ICDF(32768) },
- { AOM_ICDF(11841), AOM_ICDF(14640), AOM_ICDF(27251), AOM_ICDF(27752),
- AOM_ICDF(32768) },
- { AOM_ICDF(8385), AOM_ICDF(10154), AOM_ICDF(18339), AOM_ICDF(19621),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(6977), AOM_ICDF(13787), AOM_ICDF(15289),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(20417), AOM_ICDF(28167), AOM_ICDF(32552), AOM_ICDF(32621),
- AOM_ICDF(32768) },
- { AOM_ICDF(16833), AOM_ICDF(23968), AOM_ICDF(31991), AOM_ICDF(32174),
- AOM_ICDF(32768) },
- { AOM_ICDF(10433), AOM_ICDF(13387), AOM_ICDF(26356), AOM_ICDF(26951),
- AOM_ICDF(32768) },
- { AOM_ICDF(5057), AOM_ICDF(6823), AOM_ICDF(18967), AOM_ICDF(19843),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(6479), AOM_ICDF(11672), AOM_ICDF(13052),
- AOM_ICDF(32768) },
- { AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(12097), AOM_ICDF(28717), AOM_ICDF(32406), AOM_ICDF(32555),
- AOM_ICDF(32768) },
- { AOM_ICDF(10433), AOM_ICDF(26113), AOM_ICDF(31504), AOM_ICDF(31975),
- AOM_ICDF(32768) },
- { AOM_ICDF(5825), AOM_ICDF(14284), AOM_ICDF(21349), AOM_ICDF(24461),
- AOM_ICDF(32768) },
- { AOM_ICDF(4545), AOM_ICDF(8454), AOM_ICDF(12648), AOM_ICDF(17501),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(7173), AOM_ICDF(15272), AOM_ICDF(19322),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
- AOM_ICDF(32768) } } } },
- { // UV plane
- { // Inter
- { // Band 0
- { AOM_ICDF(27456), AOM_ICDF(28244), AOM_ICDF(31289), AOM_ICDF(32358),
- AOM_ICDF(32534), AOM_ICDF(32768) },
- { AOM_ICDF(16960), AOM_ICDF(21207), AOM_ICDF(26511), AOM_ICDF(30539),
- AOM_ICDF(31190), AOM_ICDF(32768) },
- { AOM_ICDF(5440), AOM_ICDF(13412), AOM_ICDF(18469), AOM_ICDF(26423),
- AOM_ICDF(27669), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(17857), AOM_ICDF(26327), AOM_ICDF(31983), AOM_ICDF(32219),
- AOM_ICDF(32768) },
- { AOM_ICDF(16065), AOM_ICDF(24198), AOM_ICDF(31431), AOM_ICDF(31785),
- AOM_ICDF(32768) },
- { AOM_ICDF(12865), AOM_ICDF(18011), AOM_ICDF(28454), AOM_ICDF(29166),
- AOM_ICDF(32768) },
- { AOM_ICDF(9665), AOM_ICDF(12501), AOM_ICDF(24331), AOM_ICDF(25147),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(3121), AOM_ICDF(12661), AOM_ICDF(13034),
- AOM_ICDF(32768) },
- { AOM_ICDF(4033), AOM_ICDF(4140), AOM_ICDF(11834), AOM_ICDF(11977),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(21185), AOM_ICDF(28338), AOM_ICDF(32249), AOM_ICDF(32417),
- AOM_ICDF(32768) },
- { AOM_ICDF(18497), AOM_ICDF(25227), AOM_ICDF(31905), AOM_ICDF(32122),
- AOM_ICDF(32768) },
- { AOM_ICDF(12097), AOM_ICDF(16516), AOM_ICDF(28610), AOM_ICDF(29166),
- AOM_ICDF(32768) },
- { AOM_ICDF(9281), AOM_ICDF(11157), AOM_ICDF(21438), AOM_ICDF(22312),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(6566), AOM_ICDF(15585), AOM_ICDF(16340),
- AOM_ICDF(32768) },
- { AOM_ICDF(9409), AOM_ICDF(9659), AOM_ICDF(11827), AOM_ICDF(12911),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(22337), AOM_ICDF(29459), AOM_ICDF(32382), AOM_ICDF(32519),
- AOM_ICDF(32768) },
- { AOM_ICDF(16961), AOM_ICDF(25262), AOM_ICDF(31874), AOM_ICDF(32123),
- AOM_ICDF(32768) },
- { AOM_ICDF(12353), AOM_ICDF(17748), AOM_ICDF(29300), AOM_ICDF(29852),
- AOM_ICDF(32768) },
- { AOM_ICDF(9025), AOM_ICDF(11528), AOM_ICDF(24468), AOM_ICDF(25141),
- AOM_ICDF(32768) },
- { AOM_ICDF(6209), AOM_ICDF(6565), AOM_ICDF(15806), AOM_ICDF(16121),
- AOM_ICDF(32768) },
- { AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(20417), AOM_ICDF(29779), AOM_ICDF(32552), AOM_ICDF(32636),
- AOM_ICDF(32768) },
- { AOM_ICDF(15553), AOM_ICDF(26420), AOM_ICDF(32063), AOM_ICDF(32295),
- AOM_ICDF(32768) },
- { AOM_ICDF(9665), AOM_ICDF(17946), AOM_ICDF(29385), AOM_ICDF(30096),
- AOM_ICDF(32768) },
- { AOM_ICDF(5569), AOM_ICDF(10207), AOM_ICDF(22410), AOM_ICDF(23836),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
- AOM_ICDF(32768) },
- { AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(10177), AOM_ICDF(30567), AOM_ICDF(32725), AOM_ICDF(32745),
- AOM_ICDF(32768) },
- { AOM_ICDF(9537), AOM_ICDF(28243), AOM_ICDF(32179), AOM_ICDF(32423),
- AOM_ICDF(32768) },
- { AOM_ICDF(13377), AOM_ICDF(23187), AOM_ICDF(29322), AOM_ICDF(30382),
- AOM_ICDF(32768) },
- { AOM_ICDF(13121), AOM_ICDF(21346), AOM_ICDF(29507), AOM_ICDF(30326),
- AOM_ICDF(32768) },
- { AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
- AOM_ICDF(32768) } } },
- { // Inter
- { // Band 0
- { AOM_ICDF(29376), AOM_ICDF(30098), AOM_ICDF(32421), AOM_ICDF(32766),
- AOM_ICDF(32767), AOM_ICDF(32768) },
- { AOM_ICDF(18368), AOM_ICDF(22916), AOM_ICDF(30116), AOM_ICDF(32541),
- AOM_ICDF(32650), AOM_ICDF(32768) },
- { AOM_ICDF(5952), AOM_ICDF(16505), AOM_ICDF(25955), AOM_ICDF(32163),
- AOM_ICDF(32365), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(19649), AOM_ICDF(30160), AOM_ICDF(32743), AOM_ICDF(32753),
- AOM_ICDF(32768) },
- { AOM_ICDF(18881), AOM_ICDF(28724), AOM_ICDF(32688), AOM_ICDF(32717),
- AOM_ICDF(32768) },
- { AOM_ICDF(16833), AOM_ICDF(23053), AOM_ICDF(31244), AOM_ICDF(31573),
- AOM_ICDF(32768) },
- { AOM_ICDF(14657), AOM_ICDF(17714), AOM_ICDF(26083), AOM_ICDF(26978),
- AOM_ICDF(32768) },
- { AOM_ICDF(14657), AOM_ICDF(16618), AOM_ICDF(24597), AOM_ICDF(25403),
- AOM_ICDF(32768) },
- { AOM_ICDF(4289), AOM_ICDF(4326), AOM_ICDF(10686), AOM_ICDF(10751),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(21953), AOM_ICDF(30956), AOM_ICDF(32748), AOM_ICDF(32757),
- AOM_ICDF(32768) },
- { AOM_ICDF(20929), AOM_ICDF(29412), AOM_ICDF(32700), AOM_ICDF(32725),
- AOM_ICDF(32768) },
- { AOM_ICDF(13377), AOM_ICDF(21495), AOM_ICDF(31216), AOM_ICDF(31569),
- AOM_ICDF(32768) },
- { AOM_ICDF(9153), AOM_ICDF(15097), AOM_ICDF(28295), AOM_ICDF(28990),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(20289), AOM_ICDF(31164), AOM_ICDF(32745), AOM_ICDF(32755),
- AOM_ICDF(32768) },
- { AOM_ICDF(17601), AOM_ICDF(29635), AOM_ICDF(32739), AOM_ICDF(32751),
- AOM_ICDF(32768) },
- { AOM_ICDF(18241), AOM_ICDF(24284), AOM_ICDF(32116), AOM_ICDF(32258),
- AOM_ICDF(32768) },
- { AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32739), AOM_ICDF(32740),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
- AOM_ICDF(32768) },
- { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(19009), AOM_ICDF(31481), AOM_ICDF(32742), AOM_ICDF(32754),
- AOM_ICDF(32768) },
- { AOM_ICDF(15809), AOM_ICDF(30521), AOM_ICDF(32736), AOM_ICDF(32750),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(32705), AOM_ICDF(32737), AOM_ICDF(32753),
- AOM_ICDF(32768) },
- { AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(11841), AOM_ICDF(32116), AOM_ICDF(32728), AOM_ICDF(32748),
- AOM_ICDF(32768) },
- { AOM_ICDF(12353), AOM_ICDF(32132), AOM_ICDF(32729), AOM_ICDF(32748),
- AOM_ICDF(32768) },
- { AOM_ICDF(7489), AOM_ICDF(12435), AOM_ICDF(25708), AOM_ICDF(26666),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
- AOM_ICDF(32768) },
- { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
- AOM_ICDF(32768) },
- { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
- AOM_ICDF(32768) } } } }
-};
-static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
- { // Y plane
- { // Intra
- { // Band 0
- { AOM_ICDF(16064), AOM_ICDF(18127), AOM_ICDF(22153), AOM_ICDF(27289),
- AOM_ICDF(28507), AOM_ICDF(32768) },
- { AOM_ICDF(6720), AOM_ICDF(10545), AOM_ICDF(13491), AOM_ICDF(20948),
- AOM_ICDF(22631), AOM_ICDF(32768) },
- { AOM_ICDF(832), AOM_ICDF(5270), AOM_ICDF(5918), AOM_ICDF(12645),
- AOM_ICDF(13532), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(14017), AOM_ICDF(16139), AOM_ICDF(26799), AOM_ICDF(27295),
- AOM_ICDF(32768) },
- { AOM_ICDF(12737), AOM_ICDF(15136), AOM_ICDF(26235), AOM_ICDF(26816),
- AOM_ICDF(32768) },
- { AOM_ICDF(10817), AOM_ICDF(12445), AOM_ICDF(23637), AOM_ICDF(24217),
- AOM_ICDF(32768) },
- { AOM_ICDF(8897), AOM_ICDF(9702), AOM_ICDF(20040), AOM_ICDF(20500),
- AOM_ICDF(32768) },
- { AOM_ICDF(5953), AOM_ICDF(6156), AOM_ICDF(13966), AOM_ICDF(14205),
- AOM_ICDF(32768) },
- { AOM_ICDF(2497), AOM_ICDF(2519), AOM_ICDF(6222), AOM_ICDF(6300),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(19777), AOM_ICDF(21403), AOM_ICDF(30054), AOM_ICDF(30269),
- AOM_ICDF(32768) },
- { AOM_ICDF(16193), AOM_ICDF(17913), AOM_ICDF(28593), AOM_ICDF(28883),
- AOM_ICDF(32768) },
- { AOM_ICDF(12609), AOM_ICDF(13572), AOM_ICDF(25248), AOM_ICDF(25534),
- AOM_ICDF(32768) },
- { AOM_ICDF(9665), AOM_ICDF(10118), AOM_ICDF(20721), AOM_ICDF(20968),
- AOM_ICDF(32768) },
- { AOM_ICDF(6849), AOM_ICDF(7028), AOM_ICDF(15202), AOM_ICDF(15391),
- AOM_ICDF(32768) },
- { AOM_ICDF(3009), AOM_ICDF(3036), AOM_ICDF(7601), AOM_ICDF(7675),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(22593), AOM_ICDF(23915), AOM_ICDF(31159), AOM_ICDF(31283),
- AOM_ICDF(32768) },
- { AOM_ICDF(17345), AOM_ICDF(18690), AOM_ICDF(29425), AOM_ICDF(29611),
- AOM_ICDF(32768) },
- { AOM_ICDF(11969), AOM_ICDF(12540), AOM_ICDF(24685), AOM_ICDF(24867),
- AOM_ICDF(32768) },
- { AOM_ICDF(8129), AOM_ICDF(8355), AOM_ICDF(18668), AOM_ICDF(18819),
- AOM_ICDF(32768) },
- { AOM_ICDF(4673), AOM_ICDF(4714), AOM_ICDF(11752), AOM_ICDF(11814),
- AOM_ICDF(32768) },
- { AOM_ICDF(1857), AOM_ICDF(1876), AOM_ICDF(5057), AOM_ICDF(5138),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(24513), AOM_ICDF(25718), AOM_ICDF(31947), AOM_ICDF(32014),
- AOM_ICDF(32768) },
- { AOM_ICDF(18881), AOM_ICDF(20029), AOM_ICDF(30409), AOM_ICDF(30527),
- AOM_ICDF(32768) },
- { AOM_ICDF(12481), AOM_ICDF(12953), AOM_ICDF(25201), AOM_ICDF(25341),
- AOM_ICDF(32768) },
- { AOM_ICDF(8385), AOM_ICDF(8528), AOM_ICDF(18815), AOM_ICDF(18910),
- AOM_ICDF(32768) },
- { AOM_ICDF(4289), AOM_ICDF(4327), AOM_ICDF(10797), AOM_ICDF(10861),
- AOM_ICDF(32768) },
- { AOM_ICDF(1857), AOM_ICDF(1872), AOM_ICDF(4332), AOM_ICDF(4415),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(26049), AOM_ICDF(27752), AOM_ICDF(32415), AOM_ICDF(32462),
- AOM_ICDF(32768) },
- { AOM_ICDF(20417), AOM_ICDF(22100), AOM_ICDF(31056), AOM_ICDF(31192),
- AOM_ICDF(32768) },
- { AOM_ICDF(12481), AOM_ICDF(13075), AOM_ICDF(24646), AOM_ICDF(24844),
- AOM_ICDF(32768) },
- { AOM_ICDF(7489), AOM_ICDF(7696), AOM_ICDF(17117), AOM_ICDF(17285),
- AOM_ICDF(32768) },
- { AOM_ICDF(3777), AOM_ICDF(3814), AOM_ICDF(10062), AOM_ICDF(10129),
- AOM_ICDF(32768) },
- { AOM_ICDF(1473), AOM_ICDF(1486), AOM_ICDF(3735), AOM_ICDF(3820),
- AOM_ICDF(32768) } } },
- { // Intra
- { // Band 0
- { AOM_ICDF(25920), AOM_ICDF(27743), AOM_ICDF(29455), AOM_ICDF(32147),
- AOM_ICDF(32280), AOM_ICDF(32768) },
- { AOM_ICDF(13888), AOM_ICDF(19845), AOM_ICDF(23350), AOM_ICDF(30219),
- AOM_ICDF(30660), AOM_ICDF(32768) },
- { AOM_ICDF(2368), AOM_ICDF(12781), AOM_ICDF(16196), AOM_ICDF(27232),
- AOM_ICDF(27894), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(21697), AOM_ICDF(24758), AOM_ICDF(32358), AOM_ICDF(32417),
- AOM_ICDF(32768) },
- { AOM_ICDF(20289), AOM_ICDF(23960), AOM_ICDF(32111), AOM_ICDF(32213),
- AOM_ICDF(32768) },
- { AOM_ICDF(17345), AOM_ICDF(19966), AOM_ICDF(30630), AOM_ICDF(30841),
- AOM_ICDF(32768) },
- { AOM_ICDF(14529), AOM_ICDF(16070), AOM_ICDF(27461), AOM_ICDF(27777),
- AOM_ICDF(32768) },
- { AOM_ICDF(9793), AOM_ICDF(10613), AOM_ICDF(21146), AOM_ICDF(21566),
- AOM_ICDF(32768) },
- { AOM_ICDF(6977), AOM_ICDF(7162), AOM_ICDF(15591), AOM_ICDF(15776),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(23617), AOM_ICDF(26783), AOM_ICDF(32572), AOM_ICDF(32607),
- AOM_ICDF(32768) },
- { AOM_ICDF(20801), AOM_ICDF(24292), AOM_ICDF(32185), AOM_ICDF(32275),
- AOM_ICDF(32768) },
- { AOM_ICDF(15169), AOM_ICDF(17905), AOM_ICDF(29916), AOM_ICDF(30181),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(12972), AOM_ICDF(25565), AOM_ICDF(26064),
- AOM_ICDF(32768) },
- { AOM_ICDF(6849), AOM_ICDF(8334), AOM_ICDF(18543), AOM_ICDF(19446),
- AOM_ICDF(32768) },
- { AOM_ICDF(3649), AOM_ICDF(4346), AOM_ICDF(12351), AOM_ICDF(13169),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(25281), AOM_ICDF(28440), AOM_ICDF(32667), AOM_ICDF(32689),
- AOM_ICDF(32768) },
- { AOM_ICDF(22081), AOM_ICDF(25694), AOM_ICDF(32414), AOM_ICDF(32476),
- AOM_ICDF(32768) },
- { AOM_ICDF(15297), AOM_ICDF(18341), AOM_ICDF(30141), AOM_ICDF(30410),
- AOM_ICDF(32768) },
- { AOM_ICDF(10305), AOM_ICDF(12381), AOM_ICDF(24477), AOM_ICDF(25084),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(6673), AOM_ICDF(16325), AOM_ICDF(17080),
- AOM_ICDF(32768) },
- { AOM_ICDF(2369), AOM_ICDF(2393), AOM_ICDF(6466), AOM_ICDF(6543),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(25921), AOM_ICDF(29445), AOM_ICDF(32729), AOM_ICDF(32739),
- AOM_ICDF(32768) },
- { AOM_ICDF(22465), AOM_ICDF(26834), AOM_ICDF(32588), AOM_ICDF(32627),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(20062), AOM_ICDF(31016), AOM_ICDF(31233),
- AOM_ICDF(32768) },
- { AOM_ICDF(11073), AOM_ICDF(13165), AOM_ICDF(25353), AOM_ICDF(25896),
- AOM_ICDF(32768) },
- { AOM_ICDF(11713), AOM_ICDF(13837), AOM_ICDF(20144), AOM_ICDF(21734),
- AOM_ICDF(32768) },
- { AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(26177), AOM_ICDF(29403), AOM_ICDF(32705), AOM_ICDF(32721),
- AOM_ICDF(32768) },
- { AOM_ICDF(22337), AOM_ICDF(26344), AOM_ICDF(32545), AOM_ICDF(32589),
- AOM_ICDF(32768) },
- { AOM_ICDF(19009), AOM_ICDF(21527), AOM_ICDF(31775), AOM_ICDF(31873),
- AOM_ICDF(32768) },
- { AOM_ICDF(11585), AOM_ICDF(12685), AOM_ICDF(22632), AOM_ICDF(23137),
- AOM_ICDF(32768) },
- { AOM_ICDF(8257), AOM_ICDF(8305), AOM_ICDF(16444), AOM_ICDF(16492),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
- AOM_ICDF(32768) } } } },
- { // UV plane
- { // Inter
- { // Band 0
- { AOM_ICDF(27200), AOM_ICDF(27981), AOM_ICDF(31389), AOM_ICDF(32444),
- AOM_ICDF(32592), AOM_ICDF(32768) },
- { AOM_ICDF(14528), AOM_ICDF(19068), AOM_ICDF(24887), AOM_ICDF(29901),
- AOM_ICDF(30688), AOM_ICDF(32768) },
- { AOM_ICDF(3776), AOM_ICDF(11778), AOM_ICDF(14700), AOM_ICDF(23745),
- AOM_ICDF(24854), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(20289), AOM_ICDF(25202), AOM_ICDF(31672), AOM_ICDF(31909),
- AOM_ICDF(32768) },
- { AOM_ICDF(18369), AOM_ICDF(23493), AOM_ICDF(31166), AOM_ICDF(31487),
- AOM_ICDF(32768) },
- { AOM_ICDF(15425), AOM_ICDF(18619), AOM_ICDF(28941), AOM_ICDF(29393),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(12535), AOM_ICDF(24287), AOM_ICDF(24792),
- AOM_ICDF(32768) },
- { AOM_ICDF(6465), AOM_ICDF(6810), AOM_ICDF(15764), AOM_ICDF(16080),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2137), AOM_ICDF(6125), AOM_ICDF(6203),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(23745), AOM_ICDF(27041), AOM_ICDF(31976), AOM_ICDF(32135),
- AOM_ICDF(32768) },
- { AOM_ICDF(19521), AOM_ICDF(22766), AOM_ICDF(31139), AOM_ICDF(31367),
- AOM_ICDF(32768) },
- { AOM_ICDF(14273), AOM_ICDF(15834), AOM_ICDF(27820), AOM_ICDF(28105),
- AOM_ICDF(32768) },
- { AOM_ICDF(9537), AOM_ICDF(10445), AOM_ICDF(22106), AOM_ICDF(22491),
- AOM_ICDF(32768) },
- { AOM_ICDF(7233), AOM_ICDF(7386), AOM_ICDF(15961), AOM_ICDF(16109),
- AOM_ICDF(32768) },
- { AOM_ICDF(2369), AOM_ICDF(2401), AOM_ICDF(7891), AOM_ICDF(7964),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(26305), AOM_ICDF(28703), AOM_ICDF(32352), AOM_ICDF(32435),
- AOM_ICDF(32768) },
- { AOM_ICDF(20673), AOM_ICDF(23490), AOM_ICDF(31517), AOM_ICDF(31680),
- AOM_ICDF(32768) },
- { AOM_ICDF(14017), AOM_ICDF(15251), AOM_ICDF(27458), AOM_ICDF(27702),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(11374), AOM_ICDF(22496), AOM_ICDF(22687),
- AOM_ICDF(32768) },
- { AOM_ICDF(9153), AOM_ICDF(9435), AOM_ICDF(22299), AOM_ICDF(22411),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(269), AOM_ICDF(13236), AOM_ICDF(13293),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(27713), AOM_ICDF(29770), AOM_ICDF(32522), AOM_ICDF(32575),
- AOM_ICDF(32768) },
- { AOM_ICDF(21569), AOM_ICDF(24342), AOM_ICDF(31785), AOM_ICDF(31919),
- AOM_ICDF(32768) },
- { AOM_ICDF(15297), AOM_ICDF(16497), AOM_ICDF(28367), AOM_ICDF(28569),
- AOM_ICDF(32768) },
- { AOM_ICDF(17601), AOM_ICDF(17828), AOM_ICDF(24444), AOM_ICDF(24582),
- AOM_ICDF(32768) },
- { AOM_ICDF(6977), AOM_ICDF(7035), AOM_ICDF(16901), AOM_ICDF(16947),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(28737), AOM_ICDF(30879), AOM_ICDF(32667), AOM_ICDF(32695),
- AOM_ICDF(32768) },
- { AOM_ICDF(22593), AOM_ICDF(26241), AOM_ICDF(32073), AOM_ICDF(32207),
- AOM_ICDF(32768) },
- { AOM_ICDF(16577), AOM_ICDF(19148), AOM_ICDF(28436), AOM_ICDF(28906),
- AOM_ICDF(32768) },
- { AOM_ICDF(12993), AOM_ICDF(14005), AOM_ICDF(23151), AOM_ICDF(23630),
- AOM_ICDF(32768) },
- { AOM_ICDF(7617), AOM_ICDF(9188), AOM_ICDF(22797), AOM_ICDF(23313),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
- AOM_ICDF(32768) } } },
- { // Inter
- { // Band 0
- { AOM_ICDF(28864), AOM_ICDF(29988), AOM_ICDF(32423), AOM_ICDF(32766),
- AOM_ICDF(32767), AOM_ICDF(32768) },
- { AOM_ICDF(18496), AOM_ICDF(24572), AOM_ICDF(30167), AOM_ICDF(32687),
- AOM_ICDF(32737), AOM_ICDF(32768) },
- { AOM_ICDF(5440), AOM_ICDF(19618), AOM_ICDF(25332), AOM_ICDF(32393),
- AOM_ICDF(32491), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(23745), AOM_ICDF(29427), AOM_ICDF(32751), AOM_ICDF(32757),
- AOM_ICDF(32768) },
- { AOM_ICDF(23745), AOM_ICDF(28704), AOM_ICDF(32716), AOM_ICDF(32731),
- AOM_ICDF(32768) },
- { AOM_ICDF(23105), AOM_ICDF(27943), AOM_ICDF(32524), AOM_ICDF(32587),
- AOM_ICDF(32768) },
- { AOM_ICDF(21057), AOM_ICDF(24773), AOM_ICDF(29589), AOM_ICDF(30282),
- AOM_ICDF(32768) },
- { AOM_ICDF(12609), AOM_ICDF(14823), AOM_ICDF(23831), AOM_ICDF(24713),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(25025), AOM_ICDF(30203), AOM_ICDF(32754), AOM_ICDF(32759),
- AOM_ICDF(32768) },
- { AOM_ICDF(23617), AOM_ICDF(28361), AOM_ICDF(32715), AOM_ICDF(32729),
- AOM_ICDF(32768) },
- { AOM_ICDF(17985), AOM_ICDF(21562), AOM_ICDF(31354), AOM_ICDF(31543),
- AOM_ICDF(32768) },
- { AOM_ICDF(12353), AOM_ICDF(18915), AOM_ICDF(28742), AOM_ICDF(29548),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(289), AOM_ICDF(16545), AOM_ICDF(16593),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(26433), AOM_ICDF(30892), AOM_ICDF(32757), AOM_ICDF(32761),
- AOM_ICDF(32768) },
- { AOM_ICDF(24513), AOM_ICDF(29274), AOM_ICDF(32721), AOM_ICDF(32735),
- AOM_ICDF(32768) },
- { AOM_ICDF(20161), AOM_ICDF(24040), AOM_ICDF(32055), AOM_ICDF(32171),
- AOM_ICDF(32768) },
- { AOM_ICDF(21953), AOM_ICDF(24678), AOM_ICDF(27382), AOM_ICDF(28734),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
- AOM_ICDF(32768) },
- { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(27457), AOM_ICDF(31485), AOM_ICDF(32759), AOM_ICDF(32763),
- AOM_ICDF(32768) },
- { AOM_ICDF(24129), AOM_ICDF(29502), AOM_ICDF(32752), AOM_ICDF(32757),
- AOM_ICDF(32768) },
- { AOM_ICDF(19009), AOM_ICDF(25452), AOM_ICDF(32473), AOM_ICDF(32544),
- AOM_ICDF(32768) },
- { AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32737), AOM_ICDF(32738),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(27841), AOM_ICDF(32288), AOM_ICDF(32759), AOM_ICDF(32764),
- AOM_ICDF(32768) },
- { AOM_ICDF(19137), AOM_ICDF(30271), AOM_ICDF(32742), AOM_ICDF(32753),
- AOM_ICDF(32768) },
- { AOM_ICDF(18625), AOM_ICDF(27739), AOM_ICDF(29979), AOM_ICDF(31099),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
- AOM_ICDF(32768) },
- { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
- AOM_ICDF(32768) },
- { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
- AOM_ICDF(32768) } } } }
-};
-static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
- { // Y plane
- { // Intra
- { // Band 0
- { AOM_ICDF(960), AOM_ICDF(4882), AOM_ICDF(9467), AOM_ICDF(17710),
- AOM_ICDF(20412), AOM_ICDF(32768) },
- { AOM_ICDF(704), AOM_ICDF(4657), AOM_ICDF(6561), AOM_ICDF(14507),
- AOM_ICDF(16279), AOM_ICDF(32768) },
- { AOM_ICDF(192), AOM_ICDF(3443), AOM_ICDF(3759), AOM_ICDF(9011),
- AOM_ICDF(9685), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(12481), AOM_ICDF(13958), AOM_ICDF(24487), AOM_ICDF(24997),
- AOM_ICDF(32768) },
- { AOM_ICDF(11457), AOM_ICDF(13075), AOM_ICDF(23820), AOM_ICDF(24406),
- AOM_ICDF(32768) },
- { AOM_ICDF(9793), AOM_ICDF(11127), AOM_ICDF(21775), AOM_ICDF(22387),
- AOM_ICDF(32768) },
- { AOM_ICDF(7745), AOM_ICDF(8457), AOM_ICDF(18155), AOM_ICDF(18655),
- AOM_ICDF(32768) },
- { AOM_ICDF(5441), AOM_ICDF(5668), AOM_ICDF(13180), AOM_ICDF(13467),
- AOM_ICDF(32768) },
- { AOM_ICDF(2497), AOM_ICDF(2520), AOM_ICDF(6340), AOM_ICDF(6417),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(19521), AOM_ICDF(20572), AOM_ICDF(28965), AOM_ICDF(29177),
- AOM_ICDF(32768) },
- { AOM_ICDF(15425), AOM_ICDF(16741), AOM_ICDF(27247), AOM_ICDF(27554),
- AOM_ICDF(32768) },
- { AOM_ICDF(11969), AOM_ICDF(12690), AOM_ICDF(23872), AOM_ICDF(24141),
- AOM_ICDF(32768) },
- { AOM_ICDF(9281), AOM_ICDF(9678), AOM_ICDF(19970), AOM_ICDF(20207),
- AOM_ICDF(32768) },
- { AOM_ICDF(6081), AOM_ICDF(6266), AOM_ICDF(14682), AOM_ICDF(14876),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(2779), AOM_ICDF(7150), AOM_ICDF(7225),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(22337), AOM_ICDF(23293), AOM_ICDF(30630), AOM_ICDF(30753),
- AOM_ICDF(32768) },
- { AOM_ICDF(16321), AOM_ICDF(17427), AOM_ICDF(28368), AOM_ICDF(28570),
- AOM_ICDF(32768) },
- { AOM_ICDF(11457), AOM_ICDF(11907), AOM_ICDF(23570), AOM_ICDF(23741),
- AOM_ICDF(32768) },
- { AOM_ICDF(7233), AOM_ICDF(7331), AOM_ICDF(17258), AOM_ICDF(17334),
- AOM_ICDF(32768) },
- { AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
- AOM_ICDF(32768) },
- { AOM_ICDF(1601), AOM_ICDF(1619), AOM_ICDF(4706), AOM_ICDF(4788),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(24769), AOM_ICDF(25536), AOM_ICDF(31660), AOM_ICDF(31722),
- AOM_ICDF(32768) },
- { AOM_ICDF(18113), AOM_ICDF(18886), AOM_ICDF(29420), AOM_ICDF(29534),
- AOM_ICDF(32768) },
- { AOM_ICDF(11201), AOM_ICDF(11412), AOM_ICDF(23207), AOM_ICDF(23291),
- AOM_ICDF(32768) },
- { AOM_ICDF(6977), AOM_ICDF(7033), AOM_ICDF(16599), AOM_ICDF(16646),
- AOM_ICDF(32768) },
- { AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
- AOM_ICDF(32768) },
- { AOM_ICDF(1601), AOM_ICDF(1620), AOM_ICDF(4827), AOM_ICDF(4909),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(28353), AOM_ICDF(28831), AOM_ICDF(32502), AOM_ICDF(32517),
- AOM_ICDF(32768) },
- { AOM_ICDF(21441), AOM_ICDF(21869), AOM_ICDF(30977), AOM_ICDF(31017),
- AOM_ICDF(32768) },
- { AOM_ICDF(11969), AOM_ICDF(12088), AOM_ICDF(24116), AOM_ICDF(24158),
- AOM_ICDF(32768) },
- { AOM_ICDF(7489), AOM_ICDF(7547), AOM_ICDF(17413), AOM_ICDF(17458),
- AOM_ICDF(32768) },
- { AOM_ICDF(4545), AOM_ICDF(4585), AOM_ICDF(11325), AOM_ICDF(11388),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2133), AOM_ICDF(5526), AOM_ICDF(5606),
- AOM_ICDF(32768) } } },
- { // Intra
- { // Band 0
- { AOM_ICDF(2496), AOM_ICDF(8717), AOM_ICDF(17280), AOM_ICDF(28922),
- AOM_ICDF(29751), AOM_ICDF(32768) },
- { AOM_ICDF(2496), AOM_ICDF(9665), AOM_ICDF(15235), AOM_ICDF(26542),
- AOM_ICDF(27580), AOM_ICDF(32768) },
- { AOM_ICDF(448), AOM_ICDF(9240), AOM_ICDF(11886), AOM_ICDF(24124),
- AOM_ICDF(24898), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(21057), AOM_ICDF(22896), AOM_ICDF(31877), AOM_ICDF(31953),
- AOM_ICDF(32768) },
- { AOM_ICDF(20673), AOM_ICDF(23151), AOM_ICDF(31706), AOM_ICDF(31825),
- AOM_ICDF(32768) },
- { AOM_ICDF(18753), AOM_ICDF(20519), AOM_ICDF(30497), AOM_ICDF(30668),
- AOM_ICDF(32768) },
- { AOM_ICDF(15425), AOM_ICDF(16608), AOM_ICDF(27789), AOM_ICDF(28027),
- AOM_ICDF(32768) },
- { AOM_ICDF(10305), AOM_ICDF(10977), AOM_ICDF(21405), AOM_ICDF(21749),
- AOM_ICDF(32768) },
- { AOM_ICDF(3649), AOM_ICDF(3812), AOM_ICDF(11213), AOM_ICDF(11445),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(24001), AOM_ICDF(25899), AOM_ICDF(32307), AOM_ICDF(32360),
- AOM_ICDF(32768) },
- { AOM_ICDF(20929), AOM_ICDF(22941), AOM_ICDF(31775), AOM_ICDF(31867),
- AOM_ICDF(32768) },
- { AOM_ICDF(15169), AOM_ICDF(16734), AOM_ICDF(29228), AOM_ICDF(29425),
- AOM_ICDF(32768) },
- { AOM_ICDF(10561), AOM_ICDF(12047), AOM_ICDF(24918), AOM_ICDF(25324),
- AOM_ICDF(32768) },
- { AOM_ICDF(6977), AOM_ICDF(7929), AOM_ICDF(18311), AOM_ICDF(18918),
- AOM_ICDF(32768) },
- { AOM_ICDF(3649), AOM_ICDF(3760), AOM_ICDF(9962), AOM_ICDF(10162),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(25793), AOM_ICDF(27526), AOM_ICDF(32565), AOM_ICDF(32591),
- AOM_ICDF(32768) },
- { AOM_ICDF(21825), AOM_ICDF(23885), AOM_ICDF(32064), AOM_ICDF(32135),
- AOM_ICDF(32768) },
- { AOM_ICDF(15041), AOM_ICDF(16286), AOM_ICDF(29203), AOM_ICDF(29360),
- AOM_ICDF(32768) },
- { AOM_ICDF(10433), AOM_ICDF(11058), AOM_ICDF(24349), AOM_ICDF(24538),
- AOM_ICDF(32768) },
- { AOM_ICDF(5569), AOM_ICDF(6016), AOM_ICDF(16460), AOM_ICDF(16794),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(26433), AOM_ICDF(28398), AOM_ICDF(32682), AOM_ICDF(32696),
- AOM_ICDF(32768) },
- { AOM_ICDF(22977), AOM_ICDF(25086), AOM_ICDF(32367), AOM_ICDF(32412),
- AOM_ICDF(32768) },
- { AOM_ICDF(16577), AOM_ICDF(17928), AOM_ICDF(30144), AOM_ICDF(30275),
- AOM_ICDF(32768) },
- { AOM_ICDF(12481), AOM_ICDF(13352), AOM_ICDF(25993), AOM_ICDF(26211),
- AOM_ICDF(32768) },
- { AOM_ICDF(7745), AOM_ICDF(8069), AOM_ICDF(20501), AOM_ICDF(20657),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(27841), AOM_ICDF(29700), AOM_ICDF(32721), AOM_ICDF(32730),
- AOM_ICDF(32768) },
- { AOM_ICDF(23873), AOM_ICDF(26202), AOM_ICDF(32578), AOM_ICDF(32604),
- AOM_ICDF(32768) },
- { AOM_ICDF(17729), AOM_ICDF(19046), AOM_ICDF(30448), AOM_ICDF(30568),
- AOM_ICDF(32768) },
- { AOM_ICDF(13505), AOM_ICDF(14508), AOM_ICDF(26034), AOM_ICDF(26304),
- AOM_ICDF(32768) },
- { AOM_ICDF(10049), AOM_ICDF(10494), AOM_ICDF(19945), AOM_ICDF(20233),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
- AOM_ICDF(32768) } } } },
- { // UV plane
- { // Inter
- { // Band 0
- { AOM_ICDF(27072), AOM_ICDF(27916), AOM_ICDF(31095), AOM_ICDF(32400),
- AOM_ICDF(32553), AOM_ICDF(32768) },
- { AOM_ICDF(12352), AOM_ICDF(16792), AOM_ICDF(22516), AOM_ICDF(28853),
- AOM_ICDF(29797), AOM_ICDF(32768) },
- { AOM_ICDF(2880), AOM_ICDF(9023), AOM_ICDF(11126), AOM_ICDF(20602),
- AOM_ICDF(21713), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(20161), AOM_ICDF(24785), AOM_ICDF(31070), AOM_ICDF(31430),
- AOM_ICDF(32768) },
- { AOM_ICDF(17985), AOM_ICDF(22773), AOM_ICDF(30430), AOM_ICDF(30880),
- AOM_ICDF(32768) },
- { AOM_ICDF(15937), AOM_ICDF(18802), AOM_ICDF(28265), AOM_ICDF(28788),
- AOM_ICDF(32768) },
- { AOM_ICDF(11841), AOM_ICDF(13587), AOM_ICDF(24798), AOM_ICDF(25335),
- AOM_ICDF(32768) },
- { AOM_ICDF(8769), AOM_ICDF(9160), AOM_ICDF(19316), AOM_ICDF(19566),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5357), AOM_ICDF(12874), AOM_ICDF(12932),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(24129), AOM_ICDF(26501), AOM_ICDF(31672), AOM_ICDF(31844),
- AOM_ICDF(32768) },
- { AOM_ICDF(19649), AOM_ICDF(21553), AOM_ICDF(30130), AOM_ICDF(30370),
- AOM_ICDF(32768) },
- { AOM_ICDF(11713), AOM_ICDF(13134), AOM_ICDF(25983), AOM_ICDF(26321),
- AOM_ICDF(32768) },
- { AOM_ICDF(9409), AOM_ICDF(9948), AOM_ICDF(21408), AOM_ICDF(21663),
- AOM_ICDF(32768) },
- { AOM_ICDF(5569), AOM_ICDF(5757), AOM_ICDF(14335), AOM_ICDF(14533),
- AOM_ICDF(32768) },
- { AOM_ICDF(2241), AOM_ICDF(2305), AOM_ICDF(13152), AOM_ICDF(13209),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(26817), AOM_ICDF(28135), AOM_ICDF(32130), AOM_ICDF(32209),
- AOM_ICDF(32768) },
- { AOM_ICDF(20161), AOM_ICDF(21412), AOM_ICDF(30331), AOM_ICDF(30481),
- AOM_ICDF(32768) },
- { AOM_ICDF(13377), AOM_ICDF(13798), AOM_ICDF(26065), AOM_ICDF(26176),
- AOM_ICDF(32768) },
- { AOM_ICDF(8129), AOM_ICDF(8290), AOM_ICDF(19920), AOM_ICDF(20008),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(5751), AOM_ICDF(14950), AOM_ICDF(15002),
- AOM_ICDF(32768) },
- { AOM_ICDF(5569), AOM_ICDF(5601), AOM_ICDF(11041), AOM_ICDF(11105),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(28225), AOM_ICDF(29079), AOM_ICDF(32387), AOM_ICDF(32426),
- AOM_ICDF(32768) },
- { AOM_ICDF(21185), AOM_ICDF(22046), AOM_ICDF(30982), AOM_ICDF(31061),
- AOM_ICDF(32768) },
- { AOM_ICDF(13377), AOM_ICDF(13595), AOM_ICDF(25762), AOM_ICDF(25824),
- AOM_ICDF(32768) },
- { AOM_ICDF(8001), AOM_ICDF(8123), AOM_ICDF(20530), AOM_ICDF(20590),
- AOM_ICDF(32768) },
- { AOM_ICDF(4289), AOM_ICDF(4322), AOM_ICDF(9907), AOM_ICDF(9974),
- AOM_ICDF(32768) },
- { AOM_ICDF(3393), AOM_ICDF(3412), AOM_ICDF(6663), AOM_ICDF(6739),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(30529), AOM_ICDF(31014), AOM_ICDF(32651), AOM_ICDF(32664),
- AOM_ICDF(32768) },
- { AOM_ICDF(23489), AOM_ICDF(24268), AOM_ICDF(31627), AOM_ICDF(31682),
- AOM_ICDF(32768) },
- { AOM_ICDF(14017), AOM_ICDF(14239), AOM_ICDF(26653), AOM_ICDF(26707),
- AOM_ICDF(32768) },
- { AOM_ICDF(11201), AOM_ICDF(11317), AOM_ICDF(23122), AOM_ICDF(23169),
- AOM_ICDF(32768) },
- { AOM_ICDF(6721), AOM_ICDF(6768), AOM_ICDF(14810), AOM_ICDF(14863),
- AOM_ICDF(32768) },
- { AOM_ICDF(6593), AOM_ICDF(6632), AOM_ICDF(13188), AOM_ICDF(13245),
- AOM_ICDF(32768) } } },
- { // Inter
- { // Band 0
- { AOM_ICDF(29888), AOM_ICDF(30492), AOM_ICDF(32500), AOM_ICDF(32766),
- AOM_ICDF(32767), AOM_ICDF(32768) },
- { AOM_ICDF(18752), AOM_ICDF(23235), AOM_ICDF(29846), AOM_ICDF(32214),
- AOM_ICDF(32442), AOM_ICDF(32768) },
- { AOM_ICDF(5568), AOM_ICDF(17762), AOM_ICDF(25039), AOM_ICDF(31213),
- AOM_ICDF(31651), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(26433), AOM_ICDF(29681), AOM_ICDF(32757), AOM_ICDF(32760),
- AOM_ICDF(32768) },
- { AOM_ICDF(24769), AOM_ICDF(28761), AOM_ICDF(32722), AOM_ICDF(32734),
- AOM_ICDF(32768) },
- { AOM_ICDF(22209), AOM_ICDF(26975), AOM_ICDF(32418), AOM_ICDF(32500),
- AOM_ICDF(32768) },
- { AOM_ICDF(16321), AOM_ICDF(21333), AOM_ICDF(28368), AOM_ICDF(29283),
- AOM_ICDF(32768) },
- { AOM_ICDF(12865), AOM_ICDF(14775), AOM_ICDF(22545), AOM_ICDF(23553),
- AOM_ICDF(32768) },
- { AOM_ICDF(12353), AOM_ICDF(12354), AOM_ICDF(12473), AOM_ICDF(12532),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(27457), AOM_ICDF(30005), AOM_ICDF(32738), AOM_ICDF(32745),
- AOM_ICDF(32768) },
- { AOM_ICDF(24897), AOM_ICDF(27541), AOM_ICDF(32723), AOM_ICDF(32731),
- AOM_ICDF(32768) },
- { AOM_ICDF(15297), AOM_ICDF(19106), AOM_ICDF(30414), AOM_ICDF(30711),
- AOM_ICDF(32768) },
- { AOM_ICDF(6593), AOM_ICDF(8826), AOM_ICDF(19732), AOM_ICDF(20840),
- AOM_ICDF(32768) },
- { AOM_ICDF(4161), AOM_ICDF(4233), AOM_ICDF(16509), AOM_ICDF(16557),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(28609), AOM_ICDF(30482), AOM_ICDF(32761), AOM_ICDF(32763),
- AOM_ICDF(32768) },
- { AOM_ICDF(25665), AOM_ICDF(27830), AOM_ICDF(32727), AOM_ICDF(32733),
- AOM_ICDF(32768) },
- { AOM_ICDF(21057), AOM_ICDF(23803), AOM_ICDF(30367), AOM_ICDF(30721),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(21878), AOM_ICDF(32726), AOM_ICDF(32737),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
- AOM_ICDF(32768) },
- { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(28993), AOM_ICDF(30944), AOM_ICDF(32762), AOM_ICDF(32764),
- AOM_ICDF(32768) },
- { AOM_ICDF(26561), AOM_ICDF(28695), AOM_ICDF(32733), AOM_ICDF(32739),
- AOM_ICDF(32768) },
- { AOM_ICDF(17985), AOM_ICDF(19028), AOM_ICDF(31008), AOM_ICDF(31079),
- AOM_ICDF(32768) },
- { AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(30273), AOM_ICDF(32029), AOM_ICDF(32764), AOM_ICDF(32766),
- AOM_ICDF(32768) },
- { AOM_ICDF(28609), AOM_ICDF(30847), AOM_ICDF(32745), AOM_ICDF(32751),
- AOM_ICDF(32768) },
- { AOM_ICDF(21313), AOM_ICDF(24377), AOM_ICDF(31986), AOM_ICDF(32098),
- AOM_ICDF(32768) },
- { AOM_ICDF(32705), AOM_ICDF(32709), AOM_ICDF(32739), AOM_ICDF(32741),
- AOM_ICDF(32768) },
- { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
- AOM_ICDF(32768) },
- { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
- AOM_ICDF(32768) } } } }
-};
-static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
- { // Y plane
- { // Intra
- { // Band 0
- { AOM_ICDF(2240), AOM_ICDF(5407), AOM_ICDF(18304), AOM_ICDF(25601),
- AOM_ICDF(27911), AOM_ICDF(32768) },
- { AOM_ICDF(960), AOM_ICDF(4633), AOM_ICDF(8197), AOM_ICDF(16254),
- AOM_ICDF(18796), AOM_ICDF(32768) },
- { AOM_ICDF(192), AOM_ICDF(3061), AOM_ICDF(3557), AOM_ICDF(8701),
- AOM_ICDF(9762), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(11969), AOM_ICDF(15846), AOM_ICDF(25660), AOM_ICDF(26667),
- AOM_ICDF(32768) },
- { AOM_ICDF(11713), AOM_ICDF(15794), AOM_ICDF(25737), AOM_ICDF(26760),
- AOM_ICDF(32768) },
- { AOM_ICDF(9281), AOM_ICDF(12675), AOM_ICDF(23181), AOM_ICDF(24351),
- AOM_ICDF(32768) },
- { AOM_ICDF(7105), AOM_ICDF(8757), AOM_ICDF(18383), AOM_ICDF(19437),
- AOM_ICDF(32768) },
- { AOM_ICDF(4289), AOM_ICDF(4579), AOM_ICDF(11353), AOM_ICDF(11792),
- AOM_ICDF(32768) },
- { AOM_ICDF(1857), AOM_ICDF(1874), AOM_ICDF(4695), AOM_ICDF(4777),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(20929), AOM_ICDF(22297), AOM_ICDF(29370), AOM_ICDF(29646),
- AOM_ICDF(32768) },
- { AOM_ICDF(17473), AOM_ICDF(18985), AOM_ICDF(28079), AOM_ICDF(28413),
- AOM_ICDF(32768) },
- { AOM_ICDF(13121), AOM_ICDF(14064), AOM_ICDF(24902), AOM_ICDF(25217),
- AOM_ICDF(32768) },
- { AOM_ICDF(9793), AOM_ICDF(10214), AOM_ICDF(20069), AOM_ICDF(20329),
- AOM_ICDF(32768) },
- { AOM_ICDF(5825), AOM_ICDF(5987), AOM_ICDF(13350), AOM_ICDF(13559),
- AOM_ICDF(32768) },
- { AOM_ICDF(2241), AOM_ICDF(2260), AOM_ICDF(5520), AOM_ICDF(5600),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(25921), AOM_ICDF(26891), AOM_ICDF(31632), AOM_ICDF(31729),
- AOM_ICDF(32768) },
- { AOM_ICDF(18241), AOM_ICDF(19463), AOM_ICDF(29222), AOM_ICDF(29419),
- AOM_ICDF(32768) },
- { AOM_ICDF(11585), AOM_ICDF(12065), AOM_ICDF(23294), AOM_ICDF(23488),
- AOM_ICDF(32768) },
- { AOM_ICDF(6593), AOM_ICDF(6686), AOM_ICDF(16153), AOM_ICDF(16234),
- AOM_ICDF(32768) },
- { AOM_ICDF(3137), AOM_ICDF(3170), AOM_ICDF(8751), AOM_ICDF(8821),
- AOM_ICDF(32768) },
- { AOM_ICDF(1345), AOM_ICDF(1359), AOM_ICDF(3739), AOM_ICDF(3824),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(27713), AOM_ICDF(28504), AOM_ICDF(32068), AOM_ICDF(32132),
- AOM_ICDF(32768) },
- { AOM_ICDF(19265), AOM_ICDF(20354), AOM_ICDF(29789), AOM_ICDF(29943),
- AOM_ICDF(32768) },
- { AOM_ICDF(11201), AOM_ICDF(11538), AOM_ICDF(22701), AOM_ICDF(22848),
- AOM_ICDF(32768) },
- { AOM_ICDF(6337), AOM_ICDF(6424), AOM_ICDF(15268), AOM_ICDF(15353),
- AOM_ICDF(32768) },
- { AOM_ICDF(3649), AOM_ICDF(3681), AOM_ICDF(9052), AOM_ICDF(9121),
- AOM_ICDF(32768) },
- { AOM_ICDF(1601), AOM_ICDF(1618), AOM_ICDF(4584), AOM_ICDF(4667),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(30913), AOM_ICDF(31044), AOM_ICDF(32635), AOM_ICDF(32640),
- AOM_ICDF(32768) },
- { AOM_ICDF(22081), AOM_ICDF(22261), AOM_ICDF(30452), AOM_ICDF(30477),
- AOM_ICDF(32768) },
- { AOM_ICDF(10561), AOM_ICDF(10625), AOM_ICDF(21535), AOM_ICDF(21568),
- AOM_ICDF(32768) },
- { AOM_ICDF(6081), AOM_ICDF(6130), AOM_ICDF(14369), AOM_ICDF(14423),
- AOM_ICDF(32768) },
- { AOM_ICDF(3777), AOM_ICDF(3809), AOM_ICDF(9156), AOM_ICDF(9225),
- AOM_ICDF(32768) },
- { AOM_ICDF(1857), AOM_ICDF(1875), AOM_ICDF(4936), AOM_ICDF(5018),
- AOM_ICDF(32768) } } },
- { // Intra
- { // Band 0
- { AOM_ICDF(4672), AOM_ICDF(6927), AOM_ICDF(23534), AOM_ICDF(29846),
- AOM_ICDF(30928), AOM_ICDF(32768) },
- { AOM_ICDF(3776), AOM_ICDF(6784), AOM_ICDF(18075), AOM_ICDF(25863),
- AOM_ICDF(27926), AOM_ICDF(32768) },
- { AOM_ICDF(1344), AOM_ICDF(5588), AOM_ICDF(12166), AOM_ICDF(20966),
- AOM_ICDF(23504), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(19393), AOM_ICDF(22016), AOM_ICDF(31280), AOM_ICDF(31444),
- AOM_ICDF(32768) },
- { AOM_ICDF(21185), AOM_ICDF(24329), AOM_ICDF(31706), AOM_ICDF(31865),
- AOM_ICDF(32768) },
- { AOM_ICDF(20673), AOM_ICDF(23240), AOM_ICDF(31186), AOM_ICDF(31379),
- AOM_ICDF(32768) },
- { AOM_ICDF(17857), AOM_ICDF(20035), AOM_ICDF(29594), AOM_ICDF(29889),
- AOM_ICDF(32768) },
- { AOM_ICDF(13633), AOM_ICDF(14929), AOM_ICDF(24883), AOM_ICDF(25337),
- AOM_ICDF(32768) },
- { AOM_ICDF(7873), AOM_ICDF(8416), AOM_ICDF(17452), AOM_ICDF(17886),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(25665), AOM_ICDF(27145), AOM_ICDF(32256), AOM_ICDF(32314),
- AOM_ICDF(32768) },
- { AOM_ICDF(21057), AOM_ICDF(22826), AOM_ICDF(31465), AOM_ICDF(31576),
- AOM_ICDF(32768) },
- { AOM_ICDF(13633), AOM_ICDF(14885), AOM_ICDF(27873), AOM_ICDF(28088),
- AOM_ICDF(32768) },
- { AOM_ICDF(8769), AOM_ICDF(9515), AOM_ICDF(21941), AOM_ICDF(22248),
- AOM_ICDF(32768) },
- { AOM_ICDF(6209), AOM_ICDF(6594), AOM_ICDF(15598), AOM_ICDF(15950),
- AOM_ICDF(32768) },
- { AOM_ICDF(1985), AOM_ICDF(2014), AOM_ICDF(6855), AOM_ICDF(6931),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(26817), AOM_ICDF(27824), AOM_ICDF(32362), AOM_ICDF(32399),
- AOM_ICDF(32768) },
- { AOM_ICDF(21185), AOM_ICDF(22321), AOM_ICDF(31389), AOM_ICDF(31466),
- AOM_ICDF(32768) },
- { AOM_ICDF(13761), AOM_ICDF(14154), AOM_ICDF(27163), AOM_ICDF(27245),
- AOM_ICDF(32768) },
- { AOM_ICDF(8897), AOM_ICDF(9011), AOM_ICDF(20600), AOM_ICDF(20659),
- AOM_ICDF(32768) },
- { AOM_ICDF(4673), AOM_ICDF(4774), AOM_ICDF(15044), AOM_ICDF(15131),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(28865), AOM_ICDF(29687), AOM_ICDF(32655), AOM_ICDF(32667),
- AOM_ICDF(32768) },
- { AOM_ICDF(23233), AOM_ICDF(24218), AOM_ICDF(32080), AOM_ICDF(32118),
- AOM_ICDF(32768) },
- { AOM_ICDF(15041), AOM_ICDF(15444), AOM_ICDF(28787), AOM_ICDF(28845),
- AOM_ICDF(32768) },
- { AOM_ICDF(9921), AOM_ICDF(10248), AOM_ICDF(22818), AOM_ICDF(22944),
- AOM_ICDF(32768) },
- { AOM_ICDF(7745), AOM_ICDF(7866), AOM_ICDF(16591), AOM_ICDF(16702),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(31169), AOM_ICDF(31559), AOM_ICDF(32741), AOM_ICDF(32744),
- AOM_ICDF(32768) },
- { AOM_ICDF(24769), AOM_ICDF(25583), AOM_ICDF(32347), AOM_ICDF(32370),
- AOM_ICDF(32768) },
- { AOM_ICDF(15937), AOM_ICDF(16169), AOM_ICDF(29120), AOM_ICDF(29152),
- AOM_ICDF(32768) },
- { AOM_ICDF(7489), AOM_ICDF(7578), AOM_ICDF(22647), AOM_ICDF(22677),
- AOM_ICDF(32768) },
- { AOM_ICDF(7617), AOM_ICDF(7689), AOM_ICDF(19849), AOM_ICDF(19887),
- AOM_ICDF(32768) },
- { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
- AOM_ICDF(32768) } } } },
- { // UV plane
- { // Inter
- { // Band 0
- { AOM_ICDF(23232), AOM_ICDF(24301), AOM_ICDF(30231), AOM_ICDF(31582),
- AOM_ICDF(32091), AOM_ICDF(32768) },
- { AOM_ICDF(7872), AOM_ICDF(11041), AOM_ICDF(22542), AOM_ICDF(27086),
- AOM_ICDF(29145), AOM_ICDF(32768) },
- { AOM_ICDF(1344), AOM_ICDF(3989), AOM_ICDF(18125), AOM_ICDF(25340),
- AOM_ICDF(27820), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(15937), AOM_ICDF(29000), AOM_ICDF(32210), AOM_ICDF(32434),
- AOM_ICDF(32768) },
- { AOM_ICDF(12353), AOM_ICDF(26626), AOM_ICDF(31533), AOM_ICDF(31993),
- AOM_ICDF(32768) },
- { AOM_ICDF(11457), AOM_ICDF(29187), AOM_ICDF(30896), AOM_ICDF(31750),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(21278), AOM_ICDF(28169), AOM_ICDF(29764),
- AOM_ICDF(32768) },
- { AOM_ICDF(7489), AOM_ICDF(8855), AOM_ICDF(13365), AOM_ICDF(15620),
- AOM_ICDF(32768) },
- { AOM_ICDF(4289), AOM_ICDF(4833), AOM_ICDF(8572), AOM_ICDF(10108),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(25025), AOM_ICDF(30783), AOM_ICDF(32603), AOM_ICDF(32666),
- AOM_ICDF(32768) },
- { AOM_ICDF(24385), AOM_ICDF(29586), AOM_ICDF(31803), AOM_ICDF(32142),
- AOM_ICDF(32768) },
- { AOM_ICDF(22337), AOM_ICDF(23002), AOM_ICDF(27573), AOM_ICDF(27903),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(12336), AOM_ICDF(21900), AOM_ICDF(22590),
- AOM_ICDF(32768) },
- { AOM_ICDF(8257), AOM_ICDF(8830), AOM_ICDF(19986), AOM_ICDF(20298),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(10990), AOM_ICDF(18660), AOM_ICDF(18701),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(29761), AOM_ICDF(31473), AOM_ICDF(32693), AOM_ICDF(32715),
- AOM_ICDF(32768) },
- { AOM_ICDF(20417), AOM_ICDF(24512), AOM_ICDF(31394), AOM_ICDF(31650),
- AOM_ICDF(32768) },
- { AOM_ICDF(11713), AOM_ICDF(13283), AOM_ICDF(25819), AOM_ICDF(26206),
- AOM_ICDF(32768) },
- { AOM_ICDF(13121), AOM_ICDF(14099), AOM_ICDF(21909), AOM_ICDF(22514),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(248), AOM_ICDF(9546), AOM_ICDF(9614),
- AOM_ICDF(32768) },
- { AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(30657), AOM_ICDF(31885), AOM_ICDF(32691), AOM_ICDF(32715),
- AOM_ICDF(32768) },
- { AOM_ICDF(19393), AOM_ICDF(26050), AOM_ICDF(31698), AOM_ICDF(31988),
- AOM_ICDF(32768) },
- { AOM_ICDF(15809), AOM_ICDF(15863), AOM_ICDF(24985), AOM_ICDF(25008),
- AOM_ICDF(32768) },
- { AOM_ICDF(23489), AOM_ICDF(28138), AOM_ICDF(32751), AOM_ICDF(32756),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
- AOM_ICDF(32768) },
- { AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(32705), AOM_ICDF(32744), AOM_ICDF(32766), AOM_ICDF(32767),
- AOM_ICDF(32768) },
- { AOM_ICDF(21953), AOM_ICDF(24962), AOM_ICDF(32156), AOM_ICDF(32246),
- AOM_ICDF(32768) },
- { AOM_ICDF(13121), AOM_ICDF(15358), AOM_ICDF(26284), AOM_ICDF(26835),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(7417), AOM_ICDF(20132), AOM_ICDF(20885),
- AOM_ICDF(32768) },
- { AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
- AOM_ICDF(32768) } } },
- { // Inter
- { // Band 0
- { AOM_ICDF(25280), AOM_ICDF(25678), AOM_ICDF(32446), AOM_ICDF(32622),
- AOM_ICDF(32724), AOM_ICDF(32768) },
- { AOM_ICDF(10560), AOM_ICDF(11822), AOM_ICDF(28682), AOM_ICDF(29919),
- AOM_ICDF(31276), AOM_ICDF(32768) },
- { AOM_ICDF(3264), AOM_ICDF(5170), AOM_ICDF(21779), AOM_ICDF(24026),
- AOM_ICDF(27905), AOM_ICDF(32768) } },
- { // Band 1
- { AOM_ICDF(24257), AOM_ICDF(30554), AOM_ICDF(32719), AOM_ICDF(32738),
- AOM_ICDF(32768) },
- { AOM_ICDF(17217), AOM_ICDF(27413), AOM_ICDF(32617), AOM_ICDF(32667),
- AOM_ICDF(32768) },
- { AOM_ICDF(22977), AOM_ICDF(27600), AOM_ICDF(32482), AOM_ICDF(32552),
- AOM_ICDF(32768) },
- { AOM_ICDF(16833), AOM_ICDF(24360), AOM_ICDF(30746), AOM_ICDF(31293),
- AOM_ICDF(32768) },
- { AOM_ICDF(17089), AOM_ICDF(20060), AOM_ICDF(28880), AOM_ICDF(29370),
- AOM_ICDF(32768) },
- { AOM_ICDF(10945), AOM_ICDF(11009), AOM_ICDF(21900), AOM_ICDF(21932),
- AOM_ICDF(32768) } },
- { // Band 2
- { AOM_ICDF(27201), AOM_ICDF(30217), AOM_ICDF(32736), AOM_ICDF(32745),
- AOM_ICDF(32768) },
- { AOM_ICDF(22721), AOM_ICDF(27676), AOM_ICDF(32749), AOM_ICDF(32754),
- AOM_ICDF(32768) },
- { AOM_ICDF(5057), AOM_ICDF(12431), AOM_ICDF(25246), AOM_ICDF(26620),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
- AOM_ICDF(32768) },
- { AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
- AOM_ICDF(32768) },
- { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
- AOM_ICDF(32768) } },
- { // Band 3
- { AOM_ICDF(27713), AOM_ICDF(30739), AOM_ICDF(32759), AOM_ICDF(32762),
- AOM_ICDF(32768) },
- { AOM_ICDF(26177), AOM_ICDF(30430), AOM_ICDF(32756), AOM_ICDF(32760),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
- AOM_ICDF(32768) },
- { AOM_ICDF(9409), AOM_ICDF(9528), AOM_ICDF(21591), AOM_ICDF(21646),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) },
- { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
- AOM_ICDF(32768) } },
- { // Band 4
- { AOM_ICDF(28993), AOM_ICDF(31156), AOM_ICDF(32747), AOM_ICDF(32753),
- AOM_ICDF(32768) },
- { AOM_ICDF(25153), AOM_ICDF(28701), AOM_ICDF(32754), AOM_ICDF(32758),
- AOM_ICDF(32768) },
- { AOM_ICDF(16449), AOM_ICDF(16544), AOM_ICDF(32737), AOM_ICDF(32738),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
- AOM_ICDF(32768) },
- { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
- AOM_ICDF(32768) },
- { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
- AOM_ICDF(32768) } },
- { // Band 5
- { AOM_ICDF(30785), AOM_ICDF(32088), AOM_ICDF(32765), AOM_ICDF(32766),
- AOM_ICDF(32768) },
- { AOM_ICDF(22977), AOM_ICDF(26623), AOM_ICDF(32750), AOM_ICDF(32754),
- AOM_ICDF(32768) },
- { AOM_ICDF(21953), AOM_ICDF(21954), AOM_ICDF(22017), AOM_ICDF(22049),
- AOM_ICDF(32768) },
- { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
- AOM_ICDF(32768) },
- { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
- AOM_ICDF(32768) },
- { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
- AOM_ICDF(32768) } } } }
-};
-
-/* clang-format on */
-#endif // !CONFIG_Q_ADAPT_PROBS
-
-static void build_tail_cdfs(aom_cdf_prob cdf_tail[CDF_SIZE(ENTROPY_TOKENS)],
- aom_cdf_prob cdf_head[CDF_SIZE(ENTROPY_TOKENS)],
- int band_zero) {
- int probNZ, prob1, prob_idx, i;
- int phead[HEAD_TOKENS + 1], sum;
- const int is_dc = !!band_zero;
- aom_cdf_prob prev_cdf;
- prev_cdf = 0;
- for (i = 0; i < HEAD_TOKENS + is_dc; ++i) {
- phead[i] = AOM_ICDF(cdf_head[i]) - prev_cdf;
- prev_cdf = AOM_ICDF(cdf_head[i]);
- }
- // Do the tail
- probNZ = CDF_PROB_TOP - phead[ZERO_TOKEN + is_dc] - (is_dc ? phead[0] : 0);
- prob1 = phead[is_dc + ONE_TOKEN_EOB] + phead[is_dc + ONE_TOKEN_NEOB];
- prob_idx =
- AOMMIN(COEFF_PROB_MODELS - 1, AOMMAX(0, ((256 * prob1) / probNZ) - 1));
-
- sum = 0;
- for (i = 0; i < TAIL_TOKENS; ++i) {
- sum += av1_pareto8_tail_probs[prob_idx][i];
- cdf_tail[i] = AOM_ICDF(sum);
- }
-}
-
-#if !CONFIG_Q_ADAPT_PROBS
-// FIXME. Optimize for TX_2X2 and TX_64X64.
-static void av1_default_coef_cdfs(FRAME_CONTEXT *fc) {
-#if CONFIG_CHROMA_2X2
- av1_copy(fc->coef_head_cdfs[TX_2X2], default_coef_head_cdf_4x4);
-#endif // CONFIG_CHROMA_2X2
- av1_copy(fc->coef_head_cdfs[TX_4X4], default_coef_head_cdf_4x4);
- av1_copy(fc->coef_head_cdfs[TX_8X8], default_coef_head_cdf_8x8);
- av1_copy(fc->coef_head_cdfs[TX_16X16], default_coef_head_cdf_16x16);
- av1_copy(fc->coef_head_cdfs[TX_32X32], default_coef_head_cdf_32x32);
-#if CONFIG_TX64X64
- av1_copy(fc->coef_head_cdfs[TX_64X64], default_coef_head_cdf_32x32);
-#endif // CONFIG_TX64X64
-}
-#endif // !CONFIG_Q_ADAPT_PROBS
-
-void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
- /* Build the tail based on a Pareto distribution */
- TX_SIZE t;
- int i, j, k, l;
- for (t = 0; t < TX_SIZES; ++t)
- for (i = 0; i < PLANE_TYPES; ++i)
- for (j = 0; j < REF_TYPES; ++j)
- for (k = 0; k < COEF_BANDS; ++k)
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
- build_tail_cdfs(fc->coef_tail_cdfs[t][i][j][k][l],
- fc->coef_head_cdfs[t][i][j][k][l], k == 0);
+static int get_q_ctx(int q) {
+ if (q <= 20) return 0;
+ if (q <= 60) return 1;
+ if (q <= 120) return 2;
+ return 3;
}
void av1_default_coef_probs(AV1_COMMON *cm) {
-#if CONFIG_Q_ADAPT_PROBS
- const int index = AOMMIN(TOKEN_CDF_Q_CTXS - 1, cm->base_qindex / 64);
-#if CONFIG_CHROMA_2X2
- av1_copy(cm->fc->coef_head_cdfs[TX_2X2],
- (*av1_default_qctx_coef_cdfs[index])[TX_4X4]);
-#endif // CONFIG_CHROMA_2X2
- av1_copy(cm->fc->coef_head_cdfs[TX_4X4],
- (*av1_default_qctx_coef_cdfs[index])[TX_4X4]);
- av1_copy(cm->fc->coef_head_cdfs[TX_8X8],
- (*av1_default_qctx_coef_cdfs[index])[TX_8X8]);
- av1_copy(cm->fc->coef_head_cdfs[TX_16X16],
- (*av1_default_qctx_coef_cdfs[index])[TX_16X16]);
- av1_copy(cm->fc->coef_head_cdfs[TX_32X32],
- (*av1_default_qctx_coef_cdfs[index])[TX_32X32]);
-#if CONFIG_TX64X64
- av1_copy(cm->fc->coef_head_cdfs[TX_64X64],
- (*av1_default_qctx_coef_cdfs[index])[TX_32X32]);
-#endif // CONFIG_TX64X64
-#else
- /* Load the head tokens */
- av1_default_coef_cdfs(cm->fc);
-#endif // CONFIG_Q_ADAPT_PROBS
- av1_coef_pareto_cdfs(cm->fc);
+ const int index = get_q_ctx(cm->base_qindex);
+#if CONFIG_ENTROPY_STATS
+ cm->coef_cdf_category = index;
+#endif
+
+ av1_copy(cm->fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index]);
+ av1_copy(cm->fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index]);
+ av1_copy(cm->fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index]);
+ av1_copy(cm->fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index]);
+ av1_copy(cm->fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index]);
+ av1_copy(cm->fc->coeff_base_eob_cdf,
+ av1_default_coeff_base_eob_multi_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index]);
+ av1_copy(cm->fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index]);
}
-#if CONFIG_LV_MAP
-void av1_adapt_coef_probs(AV1_COMMON *cm) {
- unsigned int count_sat, update_factor;
- if (!frame_is_intra_only(cm) && cm->last_frame_type == KEY_FRAME) {
- update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
- count_sat = COEF_COUNT_SAT_AFTER_KEY;
- } else {
- update_factor = COEF_MAX_UPDATE_FACTOR;
- count_sat = COEF_COUNT_SAT;
+static void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr, int num_cdfs,
+ int cdf_stride, int nsymbs) {
+ for (int i = 0; i < num_cdfs; i++) {
+ cdf_ptr[i * cdf_stride + nsymbs] = 0;
}
- av1_adapt_txb_probs(cm, count_sat, update_factor);
}
-#endif // CONFIG_LV_MAP
-static void av1_average_cdf(aom_cdf_prob *cdf_ptr[], aom_cdf_prob *fc_cdf_ptr,
- int cdf_size, const int num_tiles) {
- int i;
- for (i = 0; i < cdf_size;) {
- do {
- int sum = 0;
- int j;
- assert(i < cdf_size);
- for (j = 0; j < num_tiles; ++j) sum += AOM_ICDF(cdf_ptr[j][i]);
- fc_cdf_ptr[i] = AOM_ICDF(sum / num_tiles);
- } while (fc_cdf_ptr[i++] != AOM_ICDF(CDF_PROB_TOP));
- // Zero symbol counts for the next frame
- assert(i < cdf_size);
- fc_cdf_ptr[i++] = 0;
- // Skip trailing zeros until the start of the next CDF.
- for (; i < cdf_size && fc_cdf_ptr[i] == 0; ++i) {
- }
+#define RESET_CDF_COUNTER(cname, nsymbs) \
+ RESET_CDF_COUNTER_STRIDE(cname, nsymbs, CDF_SIZE(nsymbs))
+
+#define RESET_CDF_COUNTER_STRIDE(cname, nsymbs, cdf_stride) \
+ do { \
+ aom_cdf_prob *cdf_ptr = (aom_cdf_prob *)cname; \
+ int array_size = (int)sizeof(cname) / sizeof(aom_cdf_prob); \
+ int num_cdfs = array_size / cdf_stride; \
+ reset_cdf_symbol_counter(cdf_ptr, num_cdfs, cdf_stride, nsymbs); \
+ } while (0)
+
+static void reset_nmv_counter(nmv_context *nmv) {
+ RESET_CDF_COUNTER(nmv->joints_cdf, 4);
+ for (int i = 0; i < 2; i++) {
+ RESET_CDF_COUNTER(nmv->comps[i].classes_cdf, MV_CLASSES);
+ RESET_CDF_COUNTER(nmv->comps[i].class0_fp_cdf, MV_FP_SIZE);
+ RESET_CDF_COUNTER(nmv->comps[i].fp_cdf, MV_FP_SIZE);
+ RESET_CDF_COUNTER(nmv->comps[i].sign_cdf, 2);
+ RESET_CDF_COUNTER(nmv->comps[i].class0_hp_cdf, 2);
+ RESET_CDF_COUNTER(nmv->comps[i].hp_cdf, 2);
+ RESET_CDF_COUNTER(nmv->comps[i].class0_cdf, CLASS0_SIZE);
+ RESET_CDF_COUNTER(nmv->comps[i].bits_cdf, 2);
}
}
-#define AVERAGE_TILE_CDFS(cname) \
- for (i = 0; i < num_tiles; ++i) \
- cdf_ptr[i] = (aom_cdf_prob *)&ec_ctxs[i]->cname; \
- fc_cdf_ptr = (aom_cdf_prob *)&fc->cname; \
- cdf_size = (int)sizeof(fc->cname) / sizeof(aom_cdf_prob); \
- av1_average_cdf(cdf_ptr, fc_cdf_ptr, cdf_size, num_tiles);
-
-void av1_average_tile_coef_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
- aom_cdf_prob *cdf_ptr[], int num_tiles) {
- int i, cdf_size;
-
- aom_cdf_prob *fc_cdf_ptr;
-
-#if CONFIG_LV_MAP
- AVERAGE_TILE_CDFS(txb_skip_cdf)
- AVERAGE_TILE_CDFS(nz_map_cdf)
- AVERAGE_TILE_CDFS(eob_flag_cdf)
- AVERAGE_TILE_CDFS(dc_sign_cdf)
- AVERAGE_TILE_CDFS(coeff_base_cdf)
- AVERAGE_TILE_CDFS(coeff_lps_cdf)
-#if BR_NODE
- AVERAGE_TILE_CDFS(coeff_br_cdf)
-#endif
-#if CONFIG_CTX1D
- AVERAGE_TILE_CDFS(eob_mode_cdf)
- AVERAGE_TILE_CDFS(empty_line_cdf)
- AVERAGE_TILE_CDFS(hv_eob_cdf)
-#endif
-#else
- AVERAGE_TILE_CDFS(coef_head_cdfs)
- AVERAGE_TILE_CDFS(coef_tail_cdfs)
-#endif
-}
-
-void av1_average_tile_mv_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
- aom_cdf_prob *cdf_ptr[], int num_tiles) {
- int i, k, cdf_size;
-
- aom_cdf_prob *fc_cdf_ptr;
-
- int j;
- for (j = 0; j < NMV_CONTEXTS; ++j) {
- AVERAGE_TILE_CDFS(nmvc[j].joint_cdf)
-
- for (k = 0; k < 2; ++k) {
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].class_cdf)
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].class0_fp_cdf)
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].fp_cdf)
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].hp_cdf)
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].class0_hp_cdf)
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].class0_cdf)
- AVERAGE_TILE_CDFS(nmvc[j].comps[k].bits_cdf)
-#endif
- }
+void av1_reset_cdf_symbol_counters(FRAME_CONTEXT *fc) {
+ RESET_CDF_COUNTER(fc->txb_skip_cdf, 2);
+ RESET_CDF_COUNTER(fc->eob_extra_cdf, 2);
+ RESET_CDF_COUNTER(fc->dc_sign_cdf, 2);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf16, 5);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf32, 6);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf64, 7);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf128, 8);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf256, 9);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf512, 10);
+ RESET_CDF_COUNTER(fc->eob_flag_cdf1024, 11);
+ RESET_CDF_COUNTER(fc->coeff_base_eob_cdf, 3);
+ RESET_CDF_COUNTER(fc->coeff_base_cdf, 4);
+ RESET_CDF_COUNTER(fc->coeff_br_cdf, BR_CDF_SIZE);
+ RESET_CDF_COUNTER(fc->newmv_cdf, 2);
+ RESET_CDF_COUNTER(fc->zeromv_cdf, 2);
+ RESET_CDF_COUNTER(fc->refmv_cdf, 2);
+ RESET_CDF_COUNTER(fc->drl_cdf, 2);
+ RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
+ RESET_CDF_COUNTER(fc->compound_type_cdf, COMPOUND_TYPES - 1);
+ RESET_CDF_COUNTER(fc->wedge_idx_cdf, 16);
+ RESET_CDF_COUNTER(fc->interintra_cdf, 2);
+ RESET_CDF_COUNTER(fc->wedge_interintra_cdf, 2);
+ RESET_CDF_COUNTER(fc->interintra_mode_cdf, INTERINTRA_MODES);
+ RESET_CDF_COUNTER(fc->motion_mode_cdf, MOTION_MODES);
+ RESET_CDF_COUNTER(fc->obmc_cdf, 2);
+ RESET_CDF_COUNTER(fc->palette_y_size_cdf, PALETTE_SIZES);
+ RESET_CDF_COUNTER(fc->palette_uv_size_cdf, PALETTE_SIZES);
+ for (int j = 0; j < PALETTE_SIZES; j++) {
+ int nsymbs = j + PALETTE_MIN_SIZE;
+ RESET_CDF_COUNTER_STRIDE(fc->palette_y_color_index_cdf[j], nsymbs,
+ CDF_SIZE(PALETTE_COLORS));
+ RESET_CDF_COUNTER_STRIDE(fc->palette_uv_color_index_cdf[j], nsymbs,
+ CDF_SIZE(PALETTE_COLORS));
}
-}
-
-void av1_average_tile_intra_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
- aom_cdf_prob *cdf_ptr[], int num_tiles) {
- int i, cdf_size;
-
- aom_cdf_prob *fc_cdf_ptr;
-
- AVERAGE_TILE_CDFS(tx_size_cdf)
-
- AVERAGE_TILE_CDFS(intra_ext_tx_cdf)
- AVERAGE_TILE_CDFS(inter_ext_tx_cdf)
-
- AVERAGE_TILE_CDFS(seg.tree_cdf)
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(seg.pred_cdf)
-#endif
- AVERAGE_TILE_CDFS(uv_mode_cdf)
-
-#if CONFIG_CFL
- AVERAGE_TILE_CDFS(cfl_sign_cdf)
- AVERAGE_TILE_CDFS(cfl_alpha_cdf)
-#endif
-
- AVERAGE_TILE_CDFS(partition_cdf)
-
- AVERAGE_TILE_CDFS(delta_q_cdf)
-#if CONFIG_EXT_DELTA_Q
- AVERAGE_TILE_CDFS(delta_lf_cdf)
-#endif
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- AVERAGE_TILE_CDFS(intra_filter_cdf)
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(skip_cdfs)
-#if CONFIG_VAR_TX
- AVERAGE_TILE_CDFS(txfm_partition_cdf)
-#endif
-#endif // CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(palette_y_size_cdf)
- AVERAGE_TILE_CDFS(palette_uv_size_cdf)
- AVERAGE_TILE_CDFS(palette_y_color_index_cdf)
- AVERAGE_TILE_CDFS(palette_uv_color_index_cdf)
-#if CONFIG_MRC_TX
- AVERAGE_TILE_CDFS(mrc_mask_intra_cdf)
-#endif // CONFIG_MRC_TX
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(palette_y_mode_cdf)
- AVERAGE_TILE_CDFS(palette_uv_mode_cdf)
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- AVERAGE_TILE_CDFS(quarter_tx_size_cdf)
-#endif
-#endif
-#if CONFIG_LPF_SB
- AVERAGE_TILE_CDFS(lpf_reuse_cdf);
- AVERAGE_TILE_CDFS(lpf_delta_cdf);
- AVERAGE_TILE_CDFS(lpf_sign_cdf);
-#endif // CONFIG_LPF_SB
-}
-
-void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
- FRAME_CONTEXT *ec_ctxs[],
- aom_cdf_prob *cdf_ptr[], int num_tiles) {
- int i, cdf_size;
-
- aom_cdf_prob *fc_cdf_ptr;
-
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(comp_inter_cdf)
-#if CONFIG_EXT_REFS
- AVERAGE_TILE_CDFS(comp_ref_cdf)
- AVERAGE_TILE_CDFS(comp_bwdref_cdf)
-#endif
-#endif
-
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(single_ref_cdf)
-
- AVERAGE_TILE_CDFS(newmv_cdf)
- AVERAGE_TILE_CDFS(zeromv_cdf)
- AVERAGE_TILE_CDFS(refmv_cdf)
- AVERAGE_TILE_CDFS(drl_cdf)
-#if CONFIG_EXT_COMP_REFS
- AVERAGE_TILE_CDFS(uni_comp_ref_cdf)
- AVERAGE_TILE_CDFS(comp_ref_type_cdf)
-#endif
-#endif
-
- // FIXME: cdfs not defined for super_tx
-
- AVERAGE_TILE_CDFS(inter_compound_mode_cdf)
-
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
- AVERAGE_TILE_CDFS(compound_type_cdf)
-#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
-#if CONFIG_COMPOUND_SINGLEREF
- AVERAGE_TILE_CDFS(inter_singleref_comp_mode_cdf)
-#endif
-
-#if CONFIG_INTERINTRA
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(interintra_cdf)
- AVERAGE_TILE_CDFS(wedge_interintra_cdf)
-#endif
- AVERAGE_TILE_CDFS(interintra_mode_cdf)
-#endif
-
- /* NB: kf_y_cdf is discarded after use, so no need
- for backwards update */
- AVERAGE_TILE_CDFS(y_mode_cdf)
-
- if (cm->interp_filter == SWITCHABLE) {
- AVERAGE_TILE_CDFS(switchable_interp_cdf)
- }
-#if CONFIG_NEW_MULTISYMBOL
- AVERAGE_TILE_CDFS(intra_inter_cdf)
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- AVERAGE_TILE_CDFS(motion_mode_cdf)
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- AVERAGE_TILE_CDFS(obmc_cdf)
-#endif
-#endif
-#endif
-#if CONFIG_MRC_TX
- AVERAGE_TILE_CDFS(mrc_mask_inter_cdf)
-#endif // CONFIG_MRC_TX
-#if CONFIG_LPF_SB
- AVERAGE_TILE_CDFS(lpf_reuse_cdf);
- AVERAGE_TILE_CDFS(lpf_delta_cdf);
- AVERAGE_TILE_CDFS(lpf_sign_cdf);
-#endif // CONFIG_LPF_SB
-}
-
-#if CONFIG_PVQ
-// Averaging PVQ's expected values for symbol coding
-static void av1_average_pvq_ex(int *cxt_ptr[], int *fc_cxt_ptr, int cxt_size,
- const int num_tiles) {
- int i, j;
- for (i = 0; i < cxt_size; ++i) {
- int sum = 0;
- for (j = 0; j < num_tiles; ++j) sum += cxt_ptr[j][i];
- fc_cxt_ptr[i] = sum / num_tiles;
- }
-}
-
-#define AVERAGE_TILE_PVQ_EX(cname) \
- for (i = 0; i < num_tiles; ++i) cxt_ptr[i] = (int *)&ec_ctxs[i]->cname; \
- fc_cxt_ptr = (int *)&fc->cname; \
- cxt_size = (int)sizeof(fc->cname) / sizeof(int); \
- av1_average_pvq_ex(cxt_ptr, fc_cxt_ptr, cxt_size, num_tiles);
-
-void av1_default_pvq_probs(AV1_COMMON *cm) {
- od_adapt_ctx *adapt = &cm->fc->pvq_context;
-
- // Init with flat probabilities.
- od_adapt_ctx_reset(adapt, 0);
-
- // TODO(yushin): Prepare offline cdf and context table for PVQ,
- // i.e. od_adapt_ctx, then load them from table,
- // for example od_adapt_ctx default_pvq_context.
- // Then do sth like this:
- // av1_copy(cm->fc->pvq_context, default_pvq_context);
-}
-
-void av1_average_tile_pvq_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
- const int num_tiles) {
- int i, j, cdf_size, cxt_size;
-
- aom_cdf_prob *cdf_ptr[MAX_TILE_ROWS * MAX_TILE_COLS];
- aom_cdf_prob *fc_cdf_ptr;
- int *cxt_ptr[MAX_TILE_ROWS * MAX_TILE_COLS];
- int *fc_cxt_ptr;
-
- AVERAGE_TILE_PVQ_EX(pvq_context.ex_dc)
- AVERAGE_TILE_PVQ_EX(pvq_context.ex_g)
-
- for (j = 0; j < OD_NPLANES_MAX; j++) {
- AVERAGE_TILE_CDFS(pvq_context.model_dc[j].cdf)
+ RESET_CDF_COUNTER(fc->palette_y_mode_cdf, 2);
+ RESET_CDF_COUNTER(fc->palette_uv_mode_cdf, 2);
+ RESET_CDF_COUNTER(fc->comp_inter_cdf, 2);
+ RESET_CDF_COUNTER(fc->single_ref_cdf, 2);
+ RESET_CDF_COUNTER(fc->comp_ref_type_cdf, 2);
+ RESET_CDF_COUNTER(fc->uni_comp_ref_cdf, 2);
+ RESET_CDF_COUNTER(fc->comp_ref_cdf, 2);
+ RESET_CDF_COUNTER(fc->comp_bwdref_cdf, 2);
+ RESET_CDF_COUNTER(fc->txfm_partition_cdf, 2);
+ RESET_CDF_COUNTER(fc->compound_index_cdf, 2);
+ RESET_CDF_COUNTER(fc->comp_group_idx_cdf, 2);
+ RESET_CDF_COUNTER(fc->skip_mode_cdfs, 2);
+ RESET_CDF_COUNTER(fc->skip_cdfs, 2);
+ RESET_CDF_COUNTER(fc->intra_inter_cdf, 2);
+ reset_nmv_counter(&fc->nmvc);
+ reset_nmv_counter(&fc->ndvc);
+ RESET_CDF_COUNTER(fc->intrabc_cdf, 2);
+ RESET_CDF_COUNTER(fc->seg.tree_cdf, MAX_SEGMENTS);
+ RESET_CDF_COUNTER(fc->seg.pred_cdf, 2);
+ RESET_CDF_COUNTER(fc->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
+ RESET_CDF_COUNTER(fc->filter_intra_cdfs, 2);
+ RESET_CDF_COUNTER(fc->filter_intra_mode_cdf, FILTER_INTRA_MODES);
+ RESET_CDF_COUNTER(fc->switchable_restore_cdf, RESTORE_SWITCHABLE_TYPES);
+ RESET_CDF_COUNTER(fc->wiener_restore_cdf, 2);
+ RESET_CDF_COUNTER(fc->sgrproj_restore_cdf, 2);
+ RESET_CDF_COUNTER(fc->y_mode_cdf, INTRA_MODES);
+ RESET_CDF_COUNTER_STRIDE(fc->uv_mode_cdf[0], UV_INTRA_MODES - 1,
+ CDF_SIZE(UV_INTRA_MODES));
+ RESET_CDF_COUNTER(fc->uv_mode_cdf[1], UV_INTRA_MODES);
+ for (int i = 0; i < PARTITION_CONTEXTS; i++) {
+ if (i < 4) {
+ RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 4, CDF_SIZE(10));
+ } else if (i < 16) {
+ RESET_CDF_COUNTER(fc->partition_cdf[i], 10);
+ } else {
+ RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 8, CDF_SIZE(10));
+ }
}
-
- AVERAGE_TILE_CDFS(pvq_context.skip_cdf)
-
- AVERAGE_TILE_PVQ_EX(pvq_context.pvq.pvq_codeword_ctx.pvq_adapt)
- AVERAGE_TILE_CDFS(pvq_context.pvq.pvq_codeword_ctx.pvq_k1_cdf)
- AVERAGE_TILE_CDFS(pvq_context.pvq.pvq_codeword_ctx.pvq_split_cdf)
-
- for (j = 0; j < 3; j++) {
- AVERAGE_TILE_CDFS(pvq_context.pvq.pvq_param_model[j].cdf)
+ RESET_CDF_COUNTER(fc->switchable_interp_cdf, SWITCHABLE_FILTERS);
+ RESET_CDF_COUNTER(fc->kf_y_cdf, INTRA_MODES);
+ RESET_CDF_COUNTER(fc->angle_delta_cdf, 2 * MAX_ANGLE_DELTA + 1);
+ RESET_CDF_COUNTER_STRIDE(fc->tx_size_cdf[0], MAX_TX_DEPTH,
+ CDF_SIZE(MAX_TX_DEPTH + 1));
+ RESET_CDF_COUNTER(fc->tx_size_cdf[1], MAX_TX_DEPTH + 1);
+ RESET_CDF_COUNTER(fc->tx_size_cdf[2], MAX_TX_DEPTH + 1);
+ RESET_CDF_COUNTER(fc->tx_size_cdf[3], MAX_TX_DEPTH + 1);
+ RESET_CDF_COUNTER(fc->delta_q_cdf, DELTA_Q_PROBS + 1);
+ RESET_CDF_COUNTER(fc->delta_lf_cdf, DELTA_LF_PROBS + 1);
+ for (int i = 0; i < FRAME_LF_COUNT; i++) {
+ RESET_CDF_COUNTER(fc->delta_lf_multi_cdf[i], DELTA_LF_PROBS + 1);
}
-
- AVERAGE_TILE_PVQ_EX(pvq_context.pvq.pvq_ext)
- AVERAGE_TILE_PVQ_EX(pvq_context.pvq.pvq_exg)
- AVERAGE_TILE_CDFS(pvq_context.pvq.pvq_gaintheta_cdf)
- AVERAGE_TILE_CDFS(pvq_context.pvq.pvq_skip_dir_cdf)
+ RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[1], 7, CDF_SIZE(TX_TYPES));
+ RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[2], 5, CDF_SIZE(TX_TYPES));
+ RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES));
+ RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[2], 12, CDF_SIZE(TX_TYPES));
+ RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[3], 2, CDF_SIZE(TX_TYPES));
+ RESET_CDF_COUNTER(fc->cfl_sign_cdf, CFL_JOINT_SIGNS);
+ RESET_CDF_COUNTER(fc->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
}
-#endif // CONFIG_PVQ
diff --git a/third_party/aom/av1/common/entropy.h b/third_party/aom/av1/common/entropy.h
index 679aae837..ef944c5a0 100644
--- a/third_party/aom/av1/common/entropy.h
+++ b/third_party/aom/av1/common/entropy.h
@@ -12,7 +12,8 @@
#ifndef AV1_COMMON_ENTROPY_H_
#define AV1_COMMON_ENTROPY_H_
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom/aom_integer.h"
#include "aom_dsp/prob.h"
@@ -24,82 +25,35 @@
extern "C" {
#endif
-#define DIFF_UPDATE_PROB 252
-#define GROUP_DIFF_UPDATE_PROB 252
-
-#if CONFIG_Q_ADAPT_PROBS
#define TOKEN_CDF_Q_CTXS 4
-#endif // CONFIG_Q_ADAPT_PROBS
-
-// Coefficient token alphabet
-#define ZERO_TOKEN 0 // 0 Extra Bits 0+0
-#define ONE_TOKEN 1 // 1 Extra Bits 0+1
-#define TWO_TOKEN 2 // 2 Extra Bits 0+1
-#define THREE_TOKEN 3 // 3 Extra Bits 0+1
-#define FOUR_TOKEN 4 // 4 Extra Bits 0+1
-#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1
-#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1
-#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1
-#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1
-#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1
-#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1
-#define EOB_TOKEN 11 // EOB Extra Bits 0+0
-#define NO_EOB 0 // Not an end-of-block
-#define EARLY_EOB 1 // End of block before the last position
-#define LAST_EOB 2 // End of block in the last position (implicit)
-#define BLOCK_Z_TOKEN 255 // block zero
-#define HEAD_TOKENS 5
-#define TAIL_TOKENS 9
-#define ONE_TOKEN_EOB 1
-#define ONE_TOKEN_NEOB 2
-#define TWO_TOKEN_PLUS_EOB 3
-#define TWO_TOKEN_PLUS_NEOB 4
-#define ENTROPY_TOKENS 12
-
-#define ENTROPY_NODES 11
-#if CONFIG_LV_MAP
#define TXB_SKIP_CONTEXTS 13
-#if CONFIG_CTX1D
-#define EOB_COEF_CONTEXTS_2D 25
-#define EOB_COEF_CONTEXTS_1D 25
-#define EOB_COEF_CONTEXTS \
- (EOB_COEF_CONTEXTS_2D + EOB_COEF_CONTEXTS_1D + EOB_COEF_CONTEXTS_1D)
-#else // CONFIG_CTX1D
-#define EOB_COEF_CONTEXTS 25
-#endif // CONFIG_CTX1D
+#define EOB_COEF_CONTEXTS 9
-#if CONFIG_EXT_TX
-#define SIG_COEF_CONTEXTS_2D 16
+#define SIG_COEF_CONTEXTS_2D 26
#define SIG_COEF_CONTEXTS_1D 16
-#define SIG_COEF_CONTEXTS \
- (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D + SIG_COEF_CONTEXTS_1D)
-#else // CONFIG_EXT_TX
-#define SIG_COEF_CONTEXTS_2D 16
-#define SIG_COEF_CONTEXTS 16
-#endif // CONFIG_EXT_TX
-#define COEFF_BASE_CONTEXTS 42
+#define SIG_COEF_CONTEXTS_EOB 4
+#define SIG_COEF_CONTEXTS (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D)
+
+#define COEFF_BASE_CONTEXTS (SIG_COEF_CONTEXTS)
#define DC_SIGN_CONTEXTS 3
#define BR_TMP_OFFSET 12
#define BR_REF_CAT 4
-#define LEVEL_CONTEXTS (BR_TMP_OFFSET * BR_REF_CAT)
+#define LEVEL_CONTEXTS 21
#define NUM_BASE_LEVELS 2
-#define COEFF_BASE_RANGE (16 - NUM_BASE_LEVELS)
-#define BASE_RANGE_SETS 3
+
+#define BR_CDF_SIZE (4)
+#define COEFF_BASE_RANGE (4 * (BR_CDF_SIZE - 1))
#define COEFF_CONTEXT_BITS 6
#define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1)
+#define MAX_BASE_BR_RANGE (COEFF_BASE_RANGE + NUM_BASE_LEVELS + 1)
#define BASE_CONTEXT_POSITION_NUM 12
-#if CONFIG_CTX1D
-#define EMPTY_LINE_CONTEXTS 5
-#define HV_EOB_CONTEXTS 24
-#endif // CONFIG_CTX1D
-
typedef enum TX_CLASS {
TX_CLASS_2D = 0,
TX_CLASS_HORIZ = 1,
@@ -107,161 +61,21 @@ typedef enum TX_CLASS {
TX_CLASSES = 3,
} TX_CLASS;
-#endif
-
-DECLARE_ALIGNED(16, extern const uint8_t, av1_pt_energy_class[ENTROPY_TOKENS]);
-
-#define CAT1_MIN_VAL 5
-#define CAT2_MIN_VAL 7
-#define CAT3_MIN_VAL 11
-#define CAT4_MIN_VAL 19
-#define CAT5_MIN_VAL 35
-#define CAT6_MIN_VAL 67
-
-// Extra bit probabilities.
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat1_prob[1]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat2_prob[2]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat3_prob[3]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat4_prob[4]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat5_prob[5]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_cat6_prob[18]);
-#if CONFIG_NEW_MULTISYMBOL
-extern const aom_cdf_prob *av1_cat1_cdf[];
-extern const aom_cdf_prob *av1_cat2_cdf[];
-extern const aom_cdf_prob *av1_cat3_cdf[];
-extern const aom_cdf_prob *av1_cat4_cdf[];
-extern const aom_cdf_prob *av1_cat5_cdf[];
-extern const aom_cdf_prob *av1_cat6_cdf[];
-#endif
-
-#define EOB_MODEL_TOKEN 3
-
-typedef struct {
-#if CONFIG_NEW_MULTISYMBOL
- const aom_cdf_prob **cdf;
-#else
- const aom_prob *prob;
-#endif
- int len;
- int base_val;
- const int16_t *cost;
-} av1_extra_bit;
-
-// indexed by token value
-extern const av1_extra_bit av1_extra_bits[ENTROPY_TOKENS];
-
-static INLINE int av1_get_cat6_extrabits_size(TX_SIZE tx_size,
- aom_bit_depth_t bit_depth) {
- tx_size = txsize_sqr_up_map[tx_size];
-#if CONFIG_TX64X64
- // TODO(debargha): Does TX_64X64 require an additional extrabit?
- if (tx_size > TX_32X32) tx_size = TX_32X32;
-#endif
-#if CONFIG_CHROMA_2X2
- int tx_offset = (tx_size < TX_4X4) ? 0 : (int)(tx_size - TX_4X4);
-#else
- int tx_offset = (int)(tx_size - TX_4X4);
-#endif
- int bits = (int)bit_depth + 3 + tx_offset;
-#if CONFIG_NEW_MULTISYMBOL
- // Round up
- bits = AOMMIN((int)sizeof(av1_cat6_prob), ((bits + 3) & ~3));
-#endif
- assert(bits <= (int)sizeof(av1_cat6_prob));
- return bits;
-}
-
#define DCT_MAX_VALUE 16384
-#if CONFIG_HIGHBITDEPTH
#define DCT_MAX_VALUE_HIGH10 65536
#define DCT_MAX_VALUE_HIGH12 262144
-#endif // CONFIG_HIGHBITDEPTH
-
-/* Coefficients are predicted via a 3-dimensional probability table. */
+/* Coefficients are predicted via a 3-dimensional probability table indexed on
+ * REF_TYPES, COEF_BANDS and COEF_CONTEXTS. */
#define REF_TYPES 2 // intra=0, inter=1
-/* Middle dimension reflects the coefficient position within the transform. */
-#define COEF_BANDS 6
-
-/* Inside dimension is measure of nearby complexity, that reflects the energy
- of nearby coefficients are nonzero. For the first coefficient (DC, unless
- block type is 0), we look at the (already encoded) blocks above and to the
- left of the current block. The context index is then the number (0,1,or 2)
- of these blocks having nonzero coefficients.
- After decoding a coefficient, the measure is determined by the size of the
- most recently decoded coefficient.
- Note that the intuitive meaning of this measure changes as coefficients
- are decoded, e.g., prior to the first token, a zero means that my neighbors
- are empty while, after the first token, because of the use of end-of-block,
- a zero means we just decoded a zero and hence guarantees that a non-zero
- coefficient will appear later in this block. However, this shift
- in meaning is perfectly OK because our context depends also on the
- coefficient band (and since zigzag positions 0, 1, and 2 are in
- distinct bands). */
-
-#define COEFF_CONTEXTS 6
-#define COEFF_CONTEXTS0 3 // for band 0
-#define BAND_COEFF_CONTEXTS(band) \
- ((band) == 0 ? COEFF_CONTEXTS0 : COEFF_CONTEXTS)
-
-#define SUBEXP_PARAM 4 /* Subexponential code parameter */
-#define MODULUS_PARAM 13 /* Modulus parameter */
-
struct AV1Common;
struct frame_contexts;
+void av1_reset_cdf_symbol_counters(struct frame_contexts *fc);
void av1_default_coef_probs(struct AV1Common *cm);
-#if CONFIG_LV_MAP
-void av1_adapt_coef_probs(struct AV1Common *cm);
-#endif // CONFIG_LV_MAP
-
-// This is the index in the scan order beyond which all coefficients for
-// 8x8 transform and above are in the top band.
-// This macro is currently unused but may be used by certain implementations
-#define MAXBAND_INDEX 21
-DECLARE_ALIGNED(16, extern const uint8_t,
- av1_coefband_trans_8x8plus[MAX_TX_SQUARE]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_coefband_trans_4x8_8x4[32]);
-DECLARE_ALIGNED(16, extern const uint8_t, av1_coefband_trans_4x4[16]);
-
-DECLARE_ALIGNED(16, extern const uint16_t, band_count_table[TX_SIZES_ALL][8]);
-DECLARE_ALIGNED(16, extern const uint16_t,
- band_cum_count_table[TX_SIZES_ALL][8]);
-
-static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
- switch (tx_size) {
- case TX_4X4: return av1_coefband_trans_4x4;
- case TX_8X4:
- case TX_4X8: return av1_coefband_trans_4x8_8x4;
- default: return av1_coefband_trans_8x8plus;
- }
-}
-
-// 128 lists of probabilities are stored for the following ONE node probs:
-// 1, 3, 5, 7, ..., 253, 255
-// In between probabilities are interpolated linearly
-
-#define COEFF_PROB_MODELS 255
-
-#define UNCONSTRAINED_NODES 3
-
-#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
-#define TAIL_NODES (MODEL_NODES + 1)
-extern const aom_tree_index av1_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
-extern const aom_prob av1_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
-
-typedef aom_cdf_prob coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
- [CDF_SIZE(ENTROPY_TOKENS)];
-extern const aom_cdf_prob av1_pareto8_token_probs[COEFF_PROB_MODELS]
- [ENTROPY_TOKENS - 2];
-extern const aom_cdf_prob av1_pareto8_tail_probs[COEFF_PROB_MODELS]
- [ENTROPY_TOKENS - 3];
struct frame_contexts;
-void av1_coef_head_cdfs(struct frame_contexts *fc);
-void av1_coef_pareto_cdfs(struct frame_contexts *fc);
-
typedef char ENTROPY_CONTEXT;
static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
@@ -273,93 +87,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
-#if CONFIG_CHROMA_2X2
- switch (tx_size) {
- case TX_2X2:
- above_ec = a[0] != 0;
- left_ec = l[0] != 0;
- break;
- case TX_4X4:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_4X8:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_8X4:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_8X8:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_8X16:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_16X8:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_16X16:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_16X32:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
- case TX_32X16:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_32X32:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
-#if CONFIG_TX64X64
- case TX_64X64:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8) |
- *(const uint64_t *)(a + 16) | *(const uint64_t *)(a + 24));
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
- *(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
- break;
- case TX_32X64:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
- *(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
- break;
- case TX_64X32:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8) |
- *(const uint64_t *)(a + 16) | *(const uint64_t *)(a + 24));
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
-#endif // CONFIG_TX64X64
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X16:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_16X4:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_8X32:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
- case TX_32X8:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!*(const uint32_t *)l;
- break;
-#endif
- default: assert(0 && "Invalid transform size."); break;
- }
- return combine_entropy_contexts(above_ec, left_ec);
-#endif // CONFIG_CHROMA_2X2
-
switch (tx_size) {
case TX_4X4:
above_ec = a[0] != 0;
@@ -401,7 +128,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint64_t *)l;
break;
-#if CONFIG_TX64X64
case TX_64X64:
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
@@ -414,8 +140,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
left_ec = !!*(const uint64_t *)l;
break;
-#endif // CONFIG_TX64X64
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
above_ec = a[0] != 0;
left_ec = !!*(const uint32_t *)l;
@@ -432,55 +156,24 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint16_t *)l;
break;
-#endif
+ case TX_16X64:
+ above_ec = !!*(const uint32_t *)a;
+ left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
+ break;
+ case TX_64X16:
+ above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
+ left_ec = !!*(const uint32_t *)l;
+ break;
default: assert(0 && "Invalid transform size."); break;
}
return combine_entropy_contexts(above_ec, left_ec);
}
-#define COEF_COUNT_SAT 24
-#define COEF_MAX_UPDATE_FACTOR 112
-#define COEF_COUNT_SAT_AFTER_KEY 24
-#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
-
-#if CONFIG_ADAPT_SCAN
-#define ADAPT_SCAN_PROB_PRECISION 10
-// 1/8 update rate
-#define ADAPT_SCAN_UPDATE_LOG_RATE 3
-#define ADAPT_SCAN_UPDATE_RATE \
- (1 << (ADAPT_SCAN_PROB_PRECISION - ADAPT_SCAN_UPDATE_LOG_RATE))
-#endif
-
-static INLINE aom_prob av1_merge_probs(aom_prob pre_prob,
- const unsigned int ct[2],
- unsigned int count_sat,
- unsigned int max_update_factor) {
- return merge_probs(pre_prob, ct, count_sat, max_update_factor);
-}
-
-static INLINE aom_prob av1_mode_mv_merge_probs(aom_prob pre_prob,
- const unsigned int ct[2]) {
- return mode_mv_merge_probs(pre_prob, ct);
+static INLINE TX_SIZE get_txsize_entropy_ctx(TX_SIZE txsize) {
+ return (TX_SIZE)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
+ 1);
}
-void av1_average_tile_coef_cdfs(struct frame_contexts *fc,
- struct frame_contexts *ec_ctxs[],
- aom_cdf_prob *cdf_ptrs[], int num_tiles);
-void av1_average_tile_mv_cdfs(struct frame_contexts *fc,
- struct frame_contexts *ec_ctxs[],
- aom_cdf_prob *cdf_ptrs[], int num_tiles);
-void av1_average_tile_intra_cdfs(struct frame_contexts *fc,
- struct frame_contexts *ec_ctxs[],
- aom_cdf_prob *cdf_ptrs[], int num_tiles);
-void av1_average_tile_inter_cdfs(struct AV1Common *cm,
- struct frame_contexts *fc,
- struct frame_contexts *ec_ctxs[],
- aom_cdf_prob *cdf_ptrs[], int num_tiles);
-#if CONFIG_PVQ
-void av1_default_pvq_probs(struct AV1Common *cm);
-void av1_average_tile_pvq_cdfs(struct frame_contexts *fc,
- struct frame_contexts *ec_ctxs[], int num_tiles);
-#endif // CONFIG_PVQ
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/entropymode.c b/third_party/aom/av1/common/entropymode.c
index 207f1e245..41dc30ddb 100644
--- a/third_party/aom/av1/common/entropymode.c
+++ b/third_party/aom/av1/common/entropymode.c
@@ -15,2466 +15,880 @@
#include "av1/common/scan.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/seg_common.h"
-#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
-#endif
-#if CONFIG_LV_MAP
-#include "av1/common/txb_common.h"
-const aom_prob default_txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
-#endif
- { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
- { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
- { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
- { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
-};
-const aom_prob default_dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS] = {
- { 125, 102, 147 }, { 119, 101, 135 },
+static const aom_cdf_prob
+ default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE(
+ INTRA_MODES)] = {
+ { { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
+ 24189, 28165, 29093, 30466) },
+ { AOM_CDF13(12016, 18066, 19516, 20303, 20719, 21444, 21888, 23032,
+ 24434, 28658, 30172, 31409) },
+ { AOM_CDF13(10052, 10771, 22296, 22788, 23055, 23239, 24133, 25620,
+ 26160, 29336, 29929, 31567) },
+ { AOM_CDF13(14091, 15406, 16442, 18808, 19136, 19546, 19998, 22096,
+ 24746, 29585, 30958, 32462) },
+ { AOM_CDF13(12122, 13265, 15603, 16501, 18609, 20033, 22391, 25583,
+ 26437, 30261, 31073, 32475) } },
+ { { AOM_CDF13(10023, 19585, 20848, 21440, 21832, 22760, 23089, 24023,
+ 25381, 29014, 30482, 31436) },
+ { AOM_CDF13(5983, 24099, 24560, 24886, 25066, 25795, 25913, 26423,
+ 27610, 29905, 31276, 31794) },
+ { AOM_CDF13(7444, 12781, 20177, 20728, 21077, 21607, 22170, 23405,
+ 24469, 27915, 29090, 30492) },
+ { AOM_CDF13(8537, 14689, 15432, 17087, 17408, 18172, 18408, 19825,
+ 24649, 29153, 31096, 32210) },
+ { AOM_CDF13(7543, 14231, 15496, 16195, 17905, 20717, 21984, 24516,
+ 26001, 29675, 30981, 31994) } },
+ { { AOM_CDF13(12613, 13591, 21383, 22004, 22312, 22577, 23401, 25055,
+ 25729, 29538, 30305, 32077) },
+ { AOM_CDF13(9687, 13470, 18506, 19230, 19604, 20147, 20695, 22062,
+ 23219, 27743, 29211, 30907) },
+ { AOM_CDF13(6183, 6505, 26024, 26252, 26366, 26434, 27082, 28354, 28555,
+ 30467, 30794, 32086) },
+ { AOM_CDF13(10718, 11734, 14954, 17224, 17565, 17924, 18561, 21523,
+ 23878, 28975, 30287, 32252) },
+ { AOM_CDF13(9194, 9858, 16501, 17263, 18424, 19171, 21563, 25961, 26561,
+ 30072, 30737, 32463) } },
+ { { AOM_CDF13(12602, 14399, 15488, 18381, 18778, 19315, 19724, 21419,
+ 25060, 29696, 30917, 32409) },
+ { AOM_CDF13(8203, 13821, 14524, 17105, 17439, 18131, 18404, 19468,
+ 25225, 29485, 31158, 32342) },
+ { AOM_CDF13(8451, 9731, 15004, 17643, 18012, 18425, 19070, 21538, 24605,
+ 29118, 30078, 32018) },
+ { AOM_CDF13(7714, 9048, 9516, 16667, 16817, 16994, 17153, 18767, 26743,
+ 30389, 31536, 32528) },
+ { AOM_CDF13(8843, 10280, 11496, 15317, 16652, 17943, 19108, 22718,
+ 25769, 29953, 30983, 32485) } },
+ { { AOM_CDF13(12578, 13671, 15979, 16834, 19075, 20913, 22989, 25449,
+ 26219, 30214, 31150, 32477) },
+ { AOM_CDF13(9563, 13626, 15080, 15892, 17756, 20863, 22207, 24236,
+ 25380, 29653, 31143, 32277) },
+ { AOM_CDF13(8356, 8901, 17616, 18256, 19350, 20106, 22598, 25947, 26466,
+ 29900, 30523, 32261) },
+ { AOM_CDF13(10835, 11815, 13124, 16042, 17018, 18039, 18947, 22753,
+ 24615, 29489, 30883, 32482) },
+ { AOM_CDF13(7618, 8288, 9859, 10509, 15386, 18657, 22903, 28776, 29180,
+ 31355, 31802, 32593) } }
+ };
+
+static const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE(
+ 2 * MAX_ANGLE_DELTA + 1)] = {
+ { AOM_CDF7(2180, 5032, 7567, 22776, 26989, 30217) },
+ { AOM_CDF7(2301, 5608, 8801, 23487, 26974, 30330) },
+ { AOM_CDF7(3780, 11018, 13699, 19354, 23083, 31286) },
+ { AOM_CDF7(4581, 11226, 15147, 17138, 21834, 28397) },
+ { AOM_CDF7(1737, 10927, 14509, 19588, 22745, 28823) },
+ { AOM_CDF7(2664, 10176, 12485, 17650, 21600, 30495) },
+ { AOM_CDF7(2240, 11096, 15453, 20341, 22561, 28917) },
+ { AOM_CDF7(3605, 10428, 12459, 17676, 21244, 30655) }
};
-const aom_prob default_coeff_base
- [TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS][COEFF_BASE_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { // TX_2X2
- {
- { 73, 128, 131, 204, 165, 226, 169, 236, 18, 128, 51,
- 153, 97, 179, 123, 201, 145, 226, 20, 128, 59, 153,
- 107, 181, 129, 201, 142, 226, 3, 128, 19, 99, 46,
- 135, 92, 166, 129, 190, 157, 217, 128, 128 },
-
- { 128, 128, 178, 218, 192, 236, 186, 243, 55, 128, 110,
- 183, 151, 205, 168, 221, 180, 238, 65, 128, 116, 178,
- 157, 206, 172, 222, 183, 238, 24, 128, 65, 127, 104,
- 164, 154, 195, 187, 216, 205, 230, 128, 128 },
- },
- {
- { 73, 128, 131, 204, 165, 226, 169, 236, 18, 128, 51,
- 153, 97, 179, 123, 201, 145, 226, 20, 128, 59, 153,
- 107, 181, 129, 201, 142, 226, 3, 128, 19, 99, 46,
- 135, 92, 166, 129, 190, 157, 217, 128, 128 },
+static const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
+ INTRA_MODES)] = { { AOM_CDF13(22801, 23489, 24293, 24756, 25601, 26123,
+ 26606, 27418, 27945, 29228, 29685, 30349) },
+ { AOM_CDF13(18673, 19845, 22631, 23318, 23950, 24649,
+ 25527, 27364, 28152, 29701, 29984, 30852) },
+ { AOM_CDF13(19770, 20979, 23396, 23939, 24241, 24654,
+ 25136, 27073, 27830, 29360, 29730, 30659) },
+ { AOM_CDF13(20155, 21301, 22838, 23178, 23261, 23533,
+ 23703, 24804, 25352, 26575, 27016, 28049) } };
- { 128, 128, 178, 218, 192, 236, 186, 243, 55, 128, 110,
- 183, 151, 205, 168, 221, 180, 238, 65, 128, 116, 178,
- 157, 206, 172, 222, 183, 238, 24, 128, 65, 127, 104,
- 164, 154, 195, 187, 216, 205, 230, 128, 128 },
- } },
-#endif
- { // TX_4X4
- {
- // PLANE_Y
- { 73, 128, 131, 204, 165, 226, 169, 236, 18, 128, 51,
- 153, 97, 179, 123, 201, 145, 226, 20, 128, 59, 153,
- 107, 181, 129, 201, 142, 226, 3, 128, 19, 99, 46,
- 135, 92, 166, 129, 190, 157, 217, 128, 128 },
-
- { 128, 128, 178, 218, 192, 236, 186, 243, 55, 128, 110,
- 183, 151, 205, 168, 221, 180, 238, 65, 128, 116, 178,
- 157, 206, 172, 222, 183, 238, 24, 128, 65, 127, 104,
- 164, 154, 195, 187, 216, 205, 230, 128, 128 },
- },
- {
- // PLANE_UV
- { 47, 128, 100, 176, 140, 207, 150, 223, 11, 128, 35,
- 133, 79, 165, 115, 186, 129, 210, 8, 128, 30, 114,
- 80, 159, 116, 187, 146, 214, 2, 128, 9, 59, 28,
- 86, 71, 131, 117, 165, 149, 188, 128, 128 },
+static const aom_cdf_prob
+ default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES][CDF_SIZE(
+ UV_INTRA_MODES)] = {
+ { { AOM_CDF13(22631, 24152, 25378, 25661, 25986, 26520, 27055, 27923,
+ 28244, 30059, 30941, 31961) },
+ { AOM_CDF13(9513, 26881, 26973, 27046, 27118, 27664, 27739, 27824,
+ 28359, 29505, 29800, 31796) },
+ { AOM_CDF13(9845, 9915, 28663, 28704, 28757, 28780, 29198, 29822, 29854,
+ 30764, 31777, 32029) },
+ { AOM_CDF13(13639, 13897, 14171, 25331, 25606, 25727, 25953, 27148,
+ 28577, 30612, 31355, 32493) },
+ { AOM_CDF13(9764, 9835, 9930, 9954, 25386, 27053, 27958, 28148, 28243,
+ 31101, 31744, 32363) },
+ { AOM_CDF13(11825, 13589, 13677, 13720, 15048, 29213, 29301, 29458,
+ 29711, 31161, 31441, 32550) },
+ { AOM_CDF13(14175, 14399, 16608, 16821, 17718, 17775, 28551, 30200,
+ 30245, 31837, 32342, 32667) },
+ { AOM_CDF13(12885, 13038, 14978, 15590, 15673, 15748, 16176, 29128,
+ 29267, 30643, 31961, 32461) },
+ { AOM_CDF13(12026, 13661, 13874, 15305, 15490, 15726, 15995, 16273,
+ 28443, 30388, 30767, 32416) },
+ { AOM_CDF13(19052, 19840, 20579, 20916, 21150, 21467, 21885, 22719,
+ 23174, 28861, 30379, 32175) },
+ { AOM_CDF13(18627, 19649, 20974, 21219, 21492, 21816, 22199, 23119,
+ 23527, 27053, 31397, 32148) },
+ { AOM_CDF13(17026, 19004, 19997, 20339, 20586, 21103, 21349, 21907,
+ 22482, 25896, 26541, 31819) },
+ { AOM_CDF13(12124, 13759, 14959, 14992, 15007, 15051, 15078, 15166,
+ 15255, 15753, 16039, 16606) } },
+ { { AOM_CDF14(10407, 11208, 12900, 13181, 13823, 14175, 14899, 15656,
+ 15986, 20086, 20995, 22455, 24212) },
+ { AOM_CDF14(4532, 19780, 20057, 20215, 20428, 21071, 21199, 21451,
+ 22099, 24228, 24693, 27032, 29472) },
+ { AOM_CDF14(5273, 5379, 20177, 20270, 20385, 20439, 20949, 21695, 21774,
+ 23138, 24256, 24703, 26679) },
+ { AOM_CDF14(6740, 7167, 7662, 14152, 14536, 14785, 15034, 16741, 18371,
+ 21520, 22206, 23389, 24182) },
+ { AOM_CDF14(4987, 5368, 5928, 6068, 19114, 20315, 21857, 22253, 22411,
+ 24911, 25380, 26027, 26376) },
+ { AOM_CDF14(5370, 6889, 7247, 7393, 9498, 21114, 21402, 21753, 21981,
+ 24780, 25386, 26517, 27176) },
+ { AOM_CDF14(4816, 4961, 7204, 7326, 8765, 8930, 20169, 20682, 20803,
+ 23188, 23763, 24455, 24940) },
+ { AOM_CDF14(6608, 6740, 8529, 9049, 9257, 9356, 9735, 18827, 19059,
+ 22336, 23204, 23964, 24793) },
+ { AOM_CDF14(5998, 7419, 7781, 8933, 9255, 9549, 9753, 10417, 18898,
+ 22494, 23139, 24764, 25989) },
+ { AOM_CDF14(10660, 11298, 12550, 12957, 13322, 13624, 14040, 15004,
+ 15534, 20714, 21789, 23443, 24861) },
+ { AOM_CDF14(10522, 11530, 12552, 12963, 13378, 13779, 14245, 15235,
+ 15902, 20102, 22696, 23774, 25838) },
+ { AOM_CDF14(10099, 10691, 12639, 13049, 13386, 13665, 14125, 15163,
+ 15636, 19676, 20474, 23519, 25208) },
+ { AOM_CDF14(3144, 5087, 7382, 7504, 7593, 7690, 7801, 8064, 8232, 9248,
+ 9875, 10521, 29048) } }
+ };
+
+static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(
+ EXT_PARTITION_TYPES)] = {
+ { AOM_CDF4(19132, 25510, 30392) },
+ { AOM_CDF4(13928, 19855, 28540) },
+ { AOM_CDF4(12522, 23679, 28629) },
+ { AOM_CDF4(9896, 18783, 25853) },
+ { AOM_CDF10(15597, 20929, 24571, 26706, 27664, 28821, 29601, 30571, 31902) },
+ { AOM_CDF10(7925, 11043, 16785, 22470, 23971, 25043, 26651, 28701, 29834) },
+ { AOM_CDF10(5414, 13269, 15111, 20488, 22360, 24500, 25537, 26336, 32117) },
+ { AOM_CDF10(2662, 6362, 8614, 20860, 23053, 24778, 26436, 27829, 31171) },
+ { AOM_CDF10(18462, 20920, 23124, 27647, 28227, 29049, 29519, 30178, 31544) },
+ { AOM_CDF10(7689, 9060, 12056, 24992, 25660, 26182, 26951, 28041, 29052) },
+ { AOM_CDF10(6015, 9009, 10062, 24544, 25409, 26545, 27071, 27526, 32047) },
+ { AOM_CDF10(1394, 2208, 2796, 28614, 29061, 29466, 29840, 30185, 31899) },
+ { AOM_CDF10(20137, 21547, 23078, 29566, 29837, 30261, 30524, 30892, 31724) },
+ { AOM_CDF10(6732, 7490, 9497, 27944, 28250, 28515, 28969, 29630, 30104) },
+ { AOM_CDF10(5945, 7663, 8348, 28683, 29117, 29749, 30064, 30298, 32238) },
+ { AOM_CDF10(870, 1212, 1487, 31198, 31394, 31574, 31743, 31881, 32332) },
+ { AOM_CDF8(27899, 28219, 28529, 32484, 32539, 32619, 32639) },
+ { AOM_CDF8(6607, 6990, 8268, 32060, 32219, 32338, 32371) },
+ { AOM_CDF8(5429, 6676, 7122, 32027, 32227, 32531, 32582) },
+ { AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) },
+};
- { 83, 128, 152, 205, 168, 227, 192, 238, 42, 128, 92,
- 169, 138, 193, 165, 209, 128, 206, 36, 128, 86, 159,
- 141, 198, 181, 213, 102, 223, 18, 128, 50, 132, 90,
- 144, 141, 169, 180, 191, 128, 217, 128, 128 },
- } },
+static const aom_cdf_prob default_intra_ext_tx_cdf
+ [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = {
{
- // TX_8X8
{
- // PLANE_Y
- { 82, 128, 143, 203, 177, 225, 186, 237, 7, 128, 37,
- 109, 78, 151, 110, 182, 139, 213, 25, 128, 51, 115,
- 86, 146, 111, 175, 125, 205, 3, 128, 12, 55, 32,
- 78, 63, 111, 96, 148, 123, 185, 146, 206 },
-
- { 136, 128, 182, 220, 201, 236, 205, 243, 46, 128, 101,
- 164, 147, 194, 170, 218, 177, 234, 62, 128, 104, 146,
- 143, 183, 165, 207, 183, 228, 30, 128, 60, 95, 95,
- 128, 135, 163, 166, 196, 175, 219, 192, 231 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
},
{
- // PLANE_UV
- { 47, 128, 112, 189, 164, 202, 163, 218, 8, 128, 32,
- 110, 68, 151, 102, 179, 134, 195, 5, 128, 22, 76,
- 54, 103, 80, 146, 101, 182, 1, 128, 5, 39, 17,
- 53, 46, 93, 79, 127, 112, 161, 64, 195 },
-
- { 90, 128, 156, 210, 183, 225, 128, 236, 39, 128, 98,
- 164, 146, 201, 209, 219, 171, 208, 32, 128, 68, 123,
- 119, 169, 154, 184, 128, 213, 15, 128, 38, 111, 83,
- 112, 120, 163, 180, 170, 154, 213, 128, 205 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ },
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ },
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
},
},
-
{
- // TX_16X16
{
- // PLANE_Y
- { 96, 128, 169, 218, 208, 233, 187, 244, 10, 128, 34,
- 101, 82, 153, 113, 184, 137, 212, 6, 128, 34, 104,
- 81, 145, 109, 176, 147, 202, 1, 128, 3, 43, 15,
- 53, 43, 89, 79, 129, 108, 168, 110, 194 },
-
- { 156, 128, 206, 232, 218, 240, 128, 251, 39, 128, 108,
- 161, 156, 202, 187, 216, 179, 234, 40, 128, 103, 152,
- 144, 185, 159, 208, 205, 227, 14, 128, 39, 84, 76,
- 110, 121, 151, 157, 187, 201, 206, 64, 216 },
+ { AOM_CDF7(1535, 8035, 9461, 12751, 23467, 27825) },
+ { AOM_CDF7(564, 3335, 9709, 10870, 18143, 28094) },
+ { AOM_CDF7(672, 3247, 3676, 11982, 19415, 23127) },
+ { AOM_CDF7(5279, 13885, 15487, 18044, 23527, 30252) },
+ { AOM_CDF7(4423, 6074, 7985, 10416, 25693, 29298) },
+ { AOM_CDF7(1486, 4241, 9460, 10662, 16456, 27694) },
+ { AOM_CDF7(439, 2838, 3522, 6737, 18058, 23754) },
+ { AOM_CDF7(1190, 4233, 4855, 11670, 20281, 24377) },
+ { AOM_CDF7(1045, 4312, 8647, 10159, 18644, 29335) },
+ { AOM_CDF7(202, 3734, 4747, 7298, 17127, 24016) },
+ { AOM_CDF7(447, 4312, 6819, 8884, 16010, 23858) },
+ { AOM_CDF7(277, 4369, 5255, 8905, 16465, 22271) },
+ { AOM_CDF7(3409, 5436, 10599, 15599, 19687, 24040) },
},
{
- // PLANE_UV
- { 42, 128, 139, 211, 180, 230, 199, 238, 3, 128, 32,
- 96, 69, 145, 102, 186, 117, 212, 4, 128, 25, 72,
- 55, 111, 81, 159, 116, 198, 1, 128, 4, 22, 16,
- 34, 35, 68, 63, 116, 89, 165, 102, 199 },
-
- { 135, 128, 193, 227, 182, 239, 128, 246, 42, 128, 115,
- 156, 146, 203, 188, 216, 128, 229, 32, 128, 82, 127,
- 120, 178, 165, 203, 213, 229, 11, 128, 32, 73, 79,
- 111, 129, 158, 162, 187, 156, 209, 85, 222 },
+ { AOM_CDF7(1870, 13742, 14530, 16498, 23770, 27698) },
+ { AOM_CDF7(326, 8796, 14632, 15079, 19272, 27486) },
+ { AOM_CDF7(484, 7576, 7712, 14443, 19159, 22591) },
+ { AOM_CDF7(1126, 15340, 15895, 17023, 20896, 30279) },
+ { AOM_CDF7(655, 4854, 5249, 5913, 22099, 27138) },
+ { AOM_CDF7(1299, 6458, 8885, 9290, 14851, 25497) },
+ { AOM_CDF7(311, 5295, 5552, 6885, 16107, 22672) },
+ { AOM_CDF7(883, 8059, 8270, 11258, 17289, 21549) },
+ { AOM_CDF7(741, 7580, 9318, 10345, 16688, 29046) },
+ { AOM_CDF7(110, 7406, 7915, 9195, 16041, 23329) },
+ { AOM_CDF7(363, 7974, 9357, 10673, 15629, 24474) },
+ { AOM_CDF7(153, 7647, 8112, 9936, 15307, 19996) },
+ { AOM_CDF7(3511, 6332, 11165, 15335, 19323, 23594) },
+ },
+ {
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ },
+ {
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+ { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
},
},
-
{
- // TX_32X32
{
- // PLANE_Y
- { 97, 128, 163, 232, 191, 246, 219, 252, 3, 128, 41,
- 108, 91, 147, 104, 183, 118, 225, 6, 128, 45, 91,
- 83, 125, 92, 160, 99, 215, 1, 128, 11, 36, 28,
- 46, 43, 59, 57, 86, 73, 145, 91, 210 },
-
- { 127, 128, 201, 239, 247, 248, 128, 254, 40, 128, 103,
- 152, 158, 199, 186, 225, 181, 242, 38, 128, 92, 112,
- 146, 189, 162, 217, 112, 239, 17, 128, 30, 47, 63,
- 89, 113, 146, 147, 187, 168, 217, 150, 233 },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
},
{
- // PLANE_UV
- { 65, 128, 155, 223, 166, 235, 154, 244, 15, 128, 57,
- 154, 110, 199, 159, 224, 149, 239, 9, 128, 57, 140,
- 97, 185, 148, 218, 176, 236, 1, 128, 3, 43, 19,
- 42, 64, 98, 117, 167, 154, 199, 128, 158 },
-
- { 130, 128, 189, 231, 171, 247, 128, 246, 63, 128, 132,
- 222, 186, 224, 199, 244, 128, 247, 55, 128, 113, 211,
- 164, 230, 225, 243, 128, 239, 7, 128, 31, 102, 106,
- 138, 147, 183, 171, 223, 171, 224, 128, 128 },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ },
+ {
+ { AOM_CDF5(1127, 12814, 22772, 27483) },
+ { AOM_CDF5(145, 6761, 11980, 26667) },
+ { AOM_CDF5(362, 5887, 11678, 16725) },
+ { AOM_CDF5(385, 15213, 18587, 30693) },
+ { AOM_CDF5(25, 2914, 23134, 27903) },
+ { AOM_CDF5(60, 4470, 11749, 23991) },
+ { AOM_CDF5(37, 3332, 14511, 21448) },
+ { AOM_CDF5(157, 6320, 13036, 17439) },
+ { AOM_CDF5(119, 6719, 12906, 29396) },
+ { AOM_CDF5(47, 5537, 12576, 21499) },
+ { AOM_CDF5(269, 6076, 11258, 23115) },
+ { AOM_CDF5(83, 5615, 12001, 17228) },
+ { AOM_CDF5(1968, 5556, 12023, 18547) },
+ },
+ {
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
+ { AOM_CDF5(6554, 13107, 19661, 26214) },
},
},
};
-const aom_prob default_nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS] = {
-#if CONFIG_EXT_TX
-#if CONFIG_CHROMA_2X2
- { { 56, 137, 82, 136, 83, 187, 124, 65,
- 215, 118, 155, 97, 160, 111, 71, 55,
-
- 142, 156, 91, 226, 107, 231, 146, 65,
- 105, 91, 232, 97, 185, 121, 90, 74,
-
- 153, 195, 123, 154, 106, 196, 143, 67,
- 232, 125, 121, 105, 159, 113, 88, 66 },
- { 50, 124, 89, 135, 116, 189, 150, 81,
- 202, 126, 130, 107, 149, 110, 85, 67,
-
- 139, 174, 112, 200, 94, 206, 146, 71,
- 163, 164, 212, 99, 177, 143, 125, 85,
-
- 151, 181, 126, 168, 135, 186, 143, 94,
- 207, 129, 142, 135, 145, 112, 98, 81 } },
-#endif
- { { 56, 137, 82, 136, 83, 187, 124, 65,
- 215, 118, 155, 97, 160, 111, 71, 55,
-
- 142, 156, 91, 226, 107, 231, 146, 65,
- 105, 91, 232, 97, 185, 121, 90, 74,
-
- 153, 195, 123, 154, 106, 196, 143, 67,
- 232, 125, 121, 105, 159, 113, 88, 66 },
- { 50, 124, 89, 135, 116, 189, 150, 81,
- 202, 126, 130, 107, 149, 110, 85, 67,
-
- 139, 174, 112, 200, 94, 206, 146, 71,
- 163, 164, 212, 99, 177, 143, 125, 85,
-
- 151, 181, 126, 168, 135, 186, 143, 94,
- 207, 129, 142, 135, 145, 112, 98, 81 } },
- { { 57, 156, 91, 162, 99, 212, 149, 81,
- 223, 128, 182, 121, 216, 163, 119, 94,
-
- 139, 183, 100, 206, 98, 242, 163, 79,
- 200, 127, 234, 112, 230, 169, 115, 90,
-
- 156, 190, 130, 172, 117, 209, 163, 80,
- 217, 145, 182, 135, 204, 163, 120, 88 },
- { 48, 133, 102, 143, 119, 190, 170, 109,
- 197, 127, 176, 137, 214, 168, 130, 119,
-
- 139, 185, 129, 210, 84, 237, 177, 75,
- 182, 165, 216, 121, 206, 177, 147, 102,
-
- 159, 192, 153, 182, 139, 203, 160, 125,
- 193, 161, 176, 142, 173, 145, 131, 114 } },
- { { 33, 148, 81, 149, 84, 219, 152, 76,
- 229, 127, 205, 120, 234, 170, 123, 88,
-
- 134, 197, 101, 213, 91, 244, 169, 85,
- 220, 141, 234, 123, 242, 183, 130, 94,
-
- 141, 184, 121, 173, 98, 213, 156, 85,
- 204, 156, 197, 119, 212, 174, 127, 92 },
- { 14, 75, 45, 98, 83, 197, 150, 90,
- 235, 124, 242, 155, 246, 187, 143, 103,
-
- 78, 185, 111, 255, 116, 255, 224, 171,
- 185, 157, 255, 85, 219, 122, 128, 128,
-
- 117, 187, 102, 181, 132, 233, 197, 93,
- 207, 135, 191, 107, 222, 175, 130, 47 } },
- {
- { 14, 79, 44, 86, 59, 178, 124, 63,
- 244, 106, 233, 117, 252, 185, 132, 92,
-
- 85, 225, 47, 236, 103, 255, 190, 116,
- 235, 114, 247, 123, 250, 174, 122, 110,
-
- 109, 197, 78, 177, 76, 242, 148, 68,
- 236, 123, 231, 103, 247, 171, 122, 91 },
- { 11, 40, 27, 92, 78, 183, 171, 70,
- 216, 74, 251, 146, 252, 213, 171, 148,
-
- 85, 225, 47, 236, 103, 255, 190, 116,
- 235, 114, 247, 123, 250, 174, 122, 110,
-
- 109, 197, 78, 177, 76, 242, 148, 68,
- 236, 123, 231, 103, 247, 171, 122, 91 },
- },
-#else // CONFIG_EXT_TX
-#if CONFIG_CHROMA_2X2
- {
- {
- 34, 103, 61, 106, 62, 160, 112, 54, 173, 121, 157, 92, 157, 129, 94,
- 65,
- },
-
- {
- 52, 124, 84, 136, 107, 197, 161, 82, 183, 151, 153, 140, 152, 134,
- 109, 81,
- },
- },
-#endif
- {
- {
- 34, 103, 61, 106, 62, 160, 112, 54, 173, 121, 157, 92, 157, 129, 94,
- 65,
- },
-
- {
- 52, 124, 84, 136, 107, 197, 161, 82, 183, 151, 153, 140, 152, 134,
- 109, 81,
- },
- },
- {
- {
- 34, 127, 74, 124, 74, 204, 153, 76, 226, 162, 207, 126, 227, 192, 149,
- 108,
- },
-
- {
- 43, 136, 115, 158, 130, 212, 187, 112, 231, 180, 202, 164, 236, 204,
- 168, 139,
- },
- },
- {
+static const aom_cdf_prob
+ default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE(
+ TX_TYPES)] = {
{
- 25, 117, 70, 120, 77, 215, 171, 102, 234, 156, 235, 155, 247, 220,
- 176, 127,
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
},
-
{
- 24, 88, 49, 100, 62, 202, 148, 62, 237, 178, 233, 168, 244, 198, 162,
- 127,
+ { AOM_CDF16(4458, 5560, 7695, 9709, 13330, 14789, 17537, 20266, 21504,
+ 22848, 23934, 25474, 27727, 28915, 30631) },
+ { AOM_CDF16(1645, 2573, 4778, 5711, 7807, 8622, 10522, 15357, 17674,
+ 20408, 22517, 25010, 27116, 28856, 30749) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
},
- },
- {
{
- 11, 54, 17, 69, 26, 128, 125, 56, 232, 130, 237, 121, 250, 168, 134,
- 114,
+ { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+ 24576, 27307, 30037) },
+ { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+ 24576, 27307, 30037) },
+ { AOM_CDF12(770, 2421, 5225, 12907, 15819, 18927, 21561, 24089, 26595,
+ 28526, 30529) },
+ { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+ 24576, 27307, 30037) },
},
-
{
- 21, 52, 32, 95, 64, 171, 152, 70, 247, 159, 252, 177, 252, 221, 192,
- 143,
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(4167) },
+ { AOM_CDF2(1998) },
+ { AOM_CDF2(748) },
},
- },
-#endif // CONFIG_EXT_TX
-};
-
-#if CONFIG_CTX1D
-const aom_prob default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { { 220, 225, 220, 216, 233, 225, 189, 178, 222, 199, 164, 112, 207,
- 171, 115, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 147, 125, 104, 36, 117, 107, 26, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 156, 124, 128, 128, 146, 68, 128, 128, 131, 17, 128, 128, 64,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-
- { 146, 150, 142, 144, 178, 167, 131, 116, 150, 123, 107, 63, 119,
- 89, 74, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 117, 127, 105, 69, 53, 56, 30, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 128, 86, 128, 128, 140, 72, 128, 128, 120, 44, 128, 128, 80,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
- { { 237, 242, 242, 219, 192, 246, 246, 243, 233, 184, 155, 234, 217,
- 188, 152, 195, 167, 114, 89, 128, 128, 128, 128, 128, 128,
-
- 180, 173, 154, 133, 112, 147, 145, 142, 102, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 198, 173, 130, 200, 128, 208, 182, 160, 106, 171, 128, 144, 128,
- 128, 128, 124, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-
- { 140, 170, 162, 111, 94, 182, 195, 165, 153, 110, 81, 178, 169,
- 158, 83, 133, 85, 85, 38, 128, 128, 128, 128, 128, 128,
-
- 112, 127, 107, 87, 31, 57, 49, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 160, 143, 99, 126, 128, 164, 133, 126, 59, 71, 128, 138, 128,
- 128, 128, 99, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
-#endif
- { { 220, 225, 220, 216, 233, 225, 189, 178, 222, 199, 164, 112, 207,
- 171, 115, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 147, 125, 104, 36, 117, 107, 26, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 156, 124, 128, 128, 146, 68, 128, 128, 131, 17, 128, 128, 64,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-
- { 146, 150, 142, 144, 178, 167, 131, 116, 150, 123, 107, 63, 119,
- 89, 74, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 117, 127, 105, 69, 53, 56, 30, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 128, 86, 128, 128, 140, 72, 128, 128, 120, 44, 128, 128, 80,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
- { { 237, 242, 242, 219, 192, 246, 246, 243, 233, 184, 155, 234, 217,
- 188, 152, 195, 167, 114, 89, 128, 128, 128, 128, 128, 128,
-
- 180, 173, 154, 133, 112, 147, 145, 142, 102, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 198, 173, 130, 200, 128, 208, 182, 160, 106, 171, 128, 144, 128,
- 128, 128, 124, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-
- { 140, 170, 162, 111, 94, 182, 195, 165, 153, 110, 81, 178, 169,
- 158, 83, 133, 85, 85, 38, 128, 128, 128, 128, 128, 128,
-
- 112, 127, 107, 87, 31, 57, 49, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 160, 143, 99, 126, 128, 164, 133, 126, 59, 71, 128, 138, 128,
- 128, 128, 99, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
- { { 229, 241, 243, 245, 247, 247, 251, 248, 235, 210, 247, 235, 208,
- 166, 245, 247, 244, 182, 236, 229, 180, 136, 128, 128, 128,
-
- 191, 197, 96, 70, 199, 128, 128, 191, 174, 117, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 211, 183, 215, 188, 138, 209, 136, 128, 170, 128, 191, 128, 161,
- 128, 182, 128, 128, 128, 164, 128, 128, 128, 128, 128, 128 },
-
- { 106, 153, 182, 191, 186, 202, 211, 203, 166, 147, 205, 205, 195,
- 128, 206, 212, 182, 109, 192, 154, 139, 79, 128, 128, 128,
-
- 112, 133, 128, 255, 128, 128, 128, 130, 154, 98, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 144, 185, 169, 199, 85, 183, 128, 128, 64, 128, 146, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
- { { 169, 203, 224, 222, 220, 228, 229, 223, 234, 247, 242, 230, 222,
- 238, 246, 234, 196, 245, 249, 245, 192, 240, 235, 199, 161,
-
- 176, 148, 158, 77, 178, 128, 128, 158, 128, 128, 196, 208, 155,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-
- 232, 187, 191, 221, 116, 217, 154, 128, 203, 128, 128, 192, 128,
- 201, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
-
- { 133, 182, 215, 204, 176, 220, 182, 168, 187, 197, 181, 145, 75,
- 164, 136, 51, 57, 156, 128, 128, 128, 85, 128, 128, 128,
-
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ };
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
-};
-#else // CONFIG_CTX1D
-const aom_prob default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- {
- { 229, 236, 231, 222, 239, 236, 214, 201, 236, 226, 195, 134, 228,
- 210, 150, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 182, 186, 172, 176, 207, 213, 152, 122, 187, 171, 131, 65, 170,
- 134, 101, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- },
-#endif
- {
- { 229, 236, 231, 222, 239, 236, 214, 201, 236, 226, 195, 134, 228,
- 210, 150, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 182, 186, 172, 176, 207, 213, 152, 122, 187, 171, 131, 65, 170,
- 134, 101, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- },
- {
- { 225, 234, 244, 236, 205, 242, 246, 247, 246, 234, 191, 242, 237,
- 215, 142, 224, 206, 142, 73, 128, 128, 128, 128, 128, 128 },
- { 154, 171, 187, 175, 62, 199, 202, 206, 215, 200, 111, 197, 199,
- 174, 100, 135, 105, 104, 45, 128, 128, 128, 128, 128, 128 },
- },
- {
- { 180, 213, 216, 229, 233, 232, 240, 235, 220, 178, 239, 238, 225,
- 187, 229, 214, 226, 200, 183, 141, 158, 179, 128, 128, 128 },
- { 190, 225, 234, 248, 249, 248, 253, 251, 232, 110, 254, 252, 236,
- 57, 253, 248, 232, 85, 244, 189, 112, 64, 128, 128, 128 },
- },
- {
- { 248, 224, 246, 244, 239, 245, 251, 246, 251, 255, 255, 255, 249,
- 255, 255, 255, 229, 255, 255, 255, 228, 255, 255, 247, 137 },
- { 204, 207, 233, 215, 193, 228, 239, 221, 227, 250, 236, 207, 135,
- 236, 186, 182, 57, 209, 140, 128, 85, 184, 110, 128, 128 },
- },
+static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
+ AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294)
};
-#endif // CONFIG_CTX1D
-const aom_prob default_coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { { 96, 128, 86, 122, 128, 84, 125, 128, 88, 99, 126, 128,
- 135, 159, 99, 130, 134, 100, 128, 144, 70, 97, 128, 139,
- 157, 168, 127, 148, 162, 121, 149, 157, 118, 127, 143, 157,
- 178, 186, 168, 171, 183, 165, 169, 180, 180, 169, 166, 177 },
- { 81, 128, 72, 95, 128, 64, 98, 128, 42, 66, 101, 128,
- 129, 163, 97, 122, 130, 91, 119, 141, 70, 94, 118, 166,
- 157, 168, 117, 143, 151, 111, 144, 154, 76, 113, 128, 158,
- 177, 185, 165, 167, 179, 155, 166, 179, 110, 137, 115, 165 } },
-#endif
- { { 96, 128, 86, 122, 128, 84, 125, 128, 88, 99, 126, 128,
- 135, 159, 99, 130, 134, 100, 128, 144, 70, 97, 128, 139,
- 157, 168, 127, 148, 162, 121, 149, 157, 118, 127, 143, 157,
- 178, 186, 168, 171, 183, 165, 169, 180, 180, 169, 166, 177 },
- { 81, 128, 72, 95, 128, 64, 98, 128, 42, 66, 101, 128,
- 129, 163, 97, 122, 130, 91, 119, 141, 70, 94, 118, 166,
- 157, 168, 117, 143, 151, 111, 144, 154, 76, 113, 128, 158,
- 177, 185, 165, 167, 179, 155, 166, 179, 110, 137, 115, 165 } },
- { { 102, 128, 79, 125, 128, 74, 121, 128, 61, 98, 128, 128,
- 141, 164, 96, 132, 150, 90, 128, 153, 62, 100, 128, 153,
- 162, 172, 120, 146, 162, 113, 142, 154, 96, 113, 138, 155,
- 181, 188, 151, 170, 179, 147, 167, 181, 158, 157, 163, 176 },
- { 103, 128, 80, 116, 128, 66, 94, 128, 35, 65, 109, 128,
- 134, 163, 104, 137, 154, 92, 128, 104, 58, 94, 129, 132,
- 156, 173, 137, 149, 165, 104, 143, 143, 112, 101, 133, 159,
- 176, 186, 134, 172, 175, 155, 169, 177, 255, 107, 137, 168 } },
- { { 125, 128, 85, 157, 128, 82, 155, 128, 42, 83, 116, 128,
- 155, 174, 101, 144, 155, 93, 140, 155, 57, 92, 124, 149,
- 173, 178, 114, 148, 161, 111, 145, 161, 77, 101, 131, 153,
- 190, 191, 140, 169, 183, 140, 169, 179, 108, 122, 150, 171 },
- { 136, 128, 108, 163, 128, 96, 140, 128, 48, 90, 85, 128,
- 144, 164, 113, 158, 179, 107, 159, 128, 43, 75, 133, 160,
- 157, 184, 144, 160, 189, 154, 152, 184, 128, 124, 137, 140,
- 188, 196, 148, 170, 178, 128, 177, 159, 128, 179, 135, 135 } },
- { { 133, 128, 110, 153, 128, 101, 157, 128, 49, 91, 134, 128,
- 151, 168, 129, 158, 162, 112, 154, 168, 63, 99, 130, 158,
- 171, 178, 128, 160, 173, 111, 155, 171, 86, 108, 143, 159,
- 194, 196, 162, 177, 185, 123, 172, 181, 101, 132, 156, 178 },
- { 133, 128, 129, 144, 128, 116, 135, 128, 43, 101, 100, 128,
- 140, 163, 158, 173, 205, 128, 165, 171, 128, 128, 210, 163,
- 172, 184, 192, 176, 201, 183, 177, 190, 128, 192, 199, 144,
- 192, 192, 1, 196, 192, 255, 171, 178, 255, 128, 171, 179 } }
-};
-#if BR_NODE
-const aom_prob
- default_coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { { { 62, 128, 54, 116, 128, 51, 97, 128, 59, 68, 107, 128,
- 119, 158, 68, 115, 131, 65, 112, 138, 34, 71, 118, 137,
- 171, 184, 110, 152, 178, 105, 146, 172, 89, 111, 145, 173,
- 214, 226, 201, 198, 214, 196, 193, 210, 239, 196, 186, 202 },
- { 41, 128, 58, 52, 128, 51, 61, 128, 92, 54, 48, 128,
- 67, 113, 36, 55, 75, 30, 56, 72, 12, 25, 50, 79,
- 94, 131, 37, 75, 108, 42, 78, 103, 5, 31, 67, 103,
- 172, 192, 131, 135, 167, 129, 136, 165, 149, 144, 120, 149 },
- { 35, 128, 74, 50, 128, 63, 59, 128, 87, 74, 38, 128,
- 32, 53, 23, 34, 50, 18, 30, 41, 15, 13, 18, 18,
- 52, 74, 18, 29, 36, 18, 31, 47, 51, 9, 15, 27,
- 96, 134, 85, 70, 93, 96, 79, 100, 108, 100, 55, 65 } },
- { { 52, 128, 35, 79, 128, 29, 66, 128, 12, 30, 57, 128,
- 113, 156, 64, 107, 172, 54, 103, 145, 23, 57, 96, 110,
- 165, 184, 95, 138, 166, 95, 141, 184, 55, 80, 133, 165,
- 212, 222, 134, 175, 206, 158, 177, 197, 102, 61, 154, 190 },
- { 36, 128, 18, 26, 128, 15, 29, 128, 4, 6, 30, 128,
- 63, 113, 25, 44, 66, 22, 40, 67, 9, 14, 34, 55,
- 90, 125, 26, 66, 82, 29, 73, 88, 1, 26, 34, 67,
- 158, 179, 70, 121, 134, 69, 111, 129, 1, 85, 54, 105 },
- { 24, 128, 8, 31, 128, 15, 16, 128, 1, 1, 1, 128,
- 32, 39, 16, 18, 43, 5, 17, 13, 1, 1, 22, 1,
- 37, 65, 26, 20, 28, 16, 15, 24, 128, 1, 1, 1,
- 83, 107, 57, 56, 74, 34, 29, 73, 128, 1, 37, 47 } } },
-#endif
- { { { 62, 128, 54, 116, 128, 51, 97, 128, 59, 68, 107, 128,
- 119, 158, 68, 115, 131, 65, 112, 138, 34, 71, 118, 137,
- 171, 184, 110, 152, 178, 105, 146, 172, 89, 111, 145, 173,
- 214, 226, 201, 198, 214, 196, 193, 210, 239, 196, 186, 202 },
- { 41, 128, 58, 52, 128, 51, 61, 128, 92, 54, 48, 128,
- 67, 113, 36, 55, 75, 30, 56, 72, 12, 25, 50, 79,
- 94, 131, 37, 75, 108, 42, 78, 103, 5, 31, 67, 103,
- 172, 192, 131, 135, 167, 129, 136, 165, 149, 144, 120, 149 },
- { 35, 128, 74, 50, 128, 63, 59, 128, 87, 74, 38, 128,
- 32, 53, 23, 34, 50, 18, 30, 41, 15, 13, 18, 18,
- 52, 74, 18, 29, 36, 18, 31, 47, 51, 9, 15, 27,
- 96, 134, 85, 70, 93, 96, 79, 100, 108, 100, 55, 65 } },
- { { 52, 128, 35, 79, 128, 29, 66, 128, 12, 30, 57, 128,
- 113, 156, 64, 107, 172, 54, 103, 145, 23, 57, 96, 110,
- 165, 184, 95, 138, 166, 95, 141, 184, 55, 80, 133, 165,
- 212, 222, 134, 175, 206, 158, 177, 197, 102, 61, 154, 190 },
- { 36, 128, 18, 26, 128, 15, 29, 128, 4, 6, 30, 128,
- 63, 113, 25, 44, 66, 22, 40, 67, 9, 14, 34, 55,
- 90, 125, 26, 66, 82, 29, 73, 88, 1, 26, 34, 67,
- 158, 179, 70, 121, 134, 69, 111, 129, 1, 85, 54, 105 },
- { 24, 128, 8, 31, 128, 15, 16, 128, 1, 1, 1, 128,
- 32, 39, 16, 18, 43, 5, 17, 13, 1, 1, 22, 1,
- 37, 65, 26, 20, 28, 16, 15, 24, 128, 1, 1, 1,
- 83, 107, 57, 56, 74, 34, 29, 73, 128, 1, 37, 47 } } },
- { { { 72, 128, 45, 113, 128, 38, 100, 128, 26, 63, 112, 128,
- 134, 177, 65, 121, 148, 57, 111, 143, 27, 68, 116, 152,
- 181, 198, 98, 148, 173, 84, 136, 168, 53, 89, 134, 170,
- 218, 230, 173, 194, 216, 160, 188, 213, 199, 177, 183, 204 },
- { 54, 128, 34, 55, 128, 32, 53, 128, 66, 45, 54, 128,
- 81, 128, 33, 59, 102, 26, 55, 80, 7, 23, 49, 91,
- 116, 145, 36, 79, 107, 35, 73, 102, 12, 28, 57, 95,
- 170, 201, 102, 133, 173, 105, 127, 173, 166, 132, 114, 149 },
- { 40, 128, 25, 30, 128, 21, 31, 128, 24, 17, 24, 128,
- 51, 67, 19, 28, 40, 17, 25, 42, 15, 13, 19, 19,
- 61, 77, 19, 30, 48, 13, 33, 50, 11, 15, 21, 30,
- 103, 147, 37, 69, 111, 37, 66, 105, 18, 18, 36, 76 } },
- { { 74, 128, 42, 99, 128, 32, 57, 128, 9, 28, 76, 128,
- 115, 187, 70, 118, 120, 52, 109, 128, 19, 60, 93, 100,
- 178, 197, 119, 147, 179, 92, 137, 178, 37, 87, 110, 158,
- 216, 227, 169, 186, 201, 128, 178, 204, 1, 96, 155, 217 },
- { 59, 128, 26, 34, 128, 11, 20, 128, 7, 8, 24, 128,
- 73, 125, 38, 74, 96, 23, 61, 79, 15, 9, 23, 110,
- 96, 151, 49, 79, 164, 22, 70, 65, 1, 1, 9, 69,
- 156, 196, 73, 105, 181, 17, 126, 155, 128, 1, 90, 111 },
- { 42, 128, 10, 11, 128, 13, 1, 128, 1, 1, 1, 128,
- 55, 63, 13, 17, 85, 1, 16, 64, 1, 1, 1, 1,
- 62, 58, 32, 21, 53, 1, 37, 91, 128, 128, 1, 1,
- 81, 133, 51, 48, 79, 1, 25, 81, 128, 128, 1, 54 } } },
- { { { 103, 128, 52, 163, 128, 46, 155, 128, 12, 45, 97, 128,
- 162, 196, 69, 140, 170, 60, 130, 158, 21, 58, 109, 150,
- 205, 214, 93, 149, 178, 79, 143, 179, 38, 71, 120, 159,
- 231, 240, 150, 192, 218, 140, 188, 220, 84, 112, 159, 196 },
- { 93, 128, 42, 143, 128, 41, 132, 128, 6, 15, 40, 128,
- 113, 172, 39, 99, 113, 33, 91, 94, 5, 15, 42, 83,
- 148, 172, 37, 91, 130, 28, 81, 121, 9, 20, 47, 87,
- 201, 223, 75, 139, 183, 77, 132, 176, 23, 41, 82, 147 },
- { 92, 128, 45, 123, 128, 28, 88, 128, 1, 8, 20, 128,
- 85, 94, 39, 95, 83, 33, 81, 61, 4, 5, 17, 25,
- 84, 109, 17, 59, 76, 11, 46, 62, 1, 4, 13, 35,
- 139, 184, 25, 86, 129, 25, 71, 123, 26, 13, 31, 84 } },
- { { 123, 128, 82, 169, 128, 62, 139, 128, 1, 28, 77, 128,
- 139, 167, 92, 170, 146, 76, 149, 255, 19, 68, 160, 73,
- 190, 209, 171, 165, 218, 57, 152, 209, 128, 61, 122, 164,
- 237, 240, 146, 210, 227, 128, 224, 220, 128, 128, 196, 199 },
- { 130, 128, 52, 141, 128, 32, 101, 128, 128, 1, 85, 128,
- 94, 155, 71, 121, 255, 30, 116, 85, 1, 8, 58, 255,
- 105, 169, 110, 101, 132, 1, 77, 142, 128, 1, 54, 96,
- 166, 214, 224, 154, 198, 255, 153, 230, 128, 85, 100, 146 },
- { 103, 128, 26, 83, 128, 20, 47, 128, 128, 128, 1, 128,
- 91, 90, 19, 76, 128, 1, 42, 1, 128, 255, 64, 128,
- 74, 77, 1, 72, 68, 128, 13, 77, 128, 128, 64, 1,
- 71, 147, 37, 99, 171, 1, 104, 151, 128, 1, 1, 96 } } },
- { { { 113, 128, 79, 165, 128, 69, 149, 128, 14, 55, 116, 128,
- 163, 202, 104, 169, 205, 82, 159, 180, 22, 64, 121, 165,
- 207, 216, 113, 177, 215, 95, 166, 195, 35, 77, 132, 179,
- 241, 244, 173, 207, 233, 128, 202, 227, 92, 121, 169, 209 },
- { 114, 128, 67, 136, 128, 54, 132, 128, 6, 26, 62, 128,
- 85, 129, 85, 146, 173, 64, 129, 140, 7, 19, 65, 92,
- 139, 169, 42, 147, 186, 40, 129, 170, 18, 18, 65, 117,
- 213, 230, 74, 172, 213, 69, 165, 196, 1, 40, 103, 170 },
- { 101, 128, 61, 134, 128, 52, 97, 128, 1, 14, 26, 128,
- 79, 72, 71, 135, 152, 56, 114, 117, 1, 10, 24, 58,
- 64, 66, 60, 133, 148, 16, 126, 123, 1, 32, 26, 56,
- 143, 197, 51, 141, 176, 59, 132, 162, 128, 17, 47, 106 } },
- { { 115, 128, 112, 135, 128, 89, 130, 128, 15, 49, 89, 128,
- 143, 238, 154, 203, 255, 138, 172, 255, 1, 98, 196, 255,
- 185, 203, 255, 211, 255, 192, 217, 235, 128, 128, 171, 255,
- 233, 233, 255, 247, 255, 1, 239, 245, 1, 128, 255, 255 },
- { 75, 128, 76, 118, 128, 35, 74, 128, 1, 13, 23, 128,
- 63, 138, 114, 164, 140, 91, 128, 128, 128, 1, 138, 64,
- 96, 128, 255, 175, 236, 85, 166, 209, 128, 1, 128, 146,
- 196, 217, 1, 204, 206, 128, 212, 221, 128, 128, 128, 219 },
- { 49, 128, 36, 62, 128, 37, 56, 128, 128, 1, 1, 128,
- 45, 37, 68, 102, 128, 90, 56, 1, 128, 128, 37, 1,
- 26, 27, 128, 126, 128, 255, 63, 142, 128, 128, 1, 1,
- 125, 159, 128, 173, 212, 128, 85, 189, 128, 128, 255, 171 } } }
- };
-#endif // BR_NODE
-#if CONFIG_CTX1D
-static const aom_prob default_eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES] = {
-#if CONFIG_CHROMA_2X2
- { { 128, 176, 157 }, { 128, 222, 198 } },
-#endif
- { { 128, 176, 157 }, { 128, 222, 198 } },
- { { 128, 35, 56 }, { 128, 203, 225 } },
- { { 128, 55, 136 }, { 128, 230, 253 } },
- { { 128, 101, 188 }, { 128, 128, 128 } }
-};
-static const aom_prob default_empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]
- [EMPTY_LINE_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { { { 128, 128, 128, 128, 128 },
- { 142, 153, 211, 205, 128 },
- { 162, 142, 203, 197, 128 } },
- { { 128, 128, 128, 128, 128 },
- { 133, 116, 178, 123, 128 },
- { 139, 109, 159, 115, 128 } } },
-#endif
- { { { 128, 128, 128, 128, 128 },
- { 142, 153, 211, 205, 128 },
- { 162, 142, 203, 197, 128 } },
- { { 128, 128, 128, 128, 128 },
- { 133, 116, 178, 123, 128 },
- { 139, 109, 159, 115, 128 } } },
- { { { 128, 128, 128, 128, 128 },
- { 185, 130, 183, 204, 227 },
- { 171, 81, 177, 200, 221 } },
- { { 128, 128, 128, 128, 128 },
- { 180, 127, 175, 189, 213 },
- { 120, 74, 129, 134, 156 } } },
- { { { 128, 128, 128, 128, 128 },
- { 202, 82, 183, 214, 248 },
- { 144, 41, 163, 185, 203 } },
- { { 128, 128, 128, 128, 128 },
- { 151, 93, 171, 224, 160 },
- { 128, 51, 171, 128, 1 } } },
- { { { 128, 128, 128, 128, 128 },
- { 154, 48, 174, 210, 233 },
- { 123, 16, 148, 189, 197 } },
- { { 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128 } } }
- };
-static const aom_prob
- default_hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
- { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 151, 173, 114, 128, 128, 128, 128, 128, 128, 162, 198, 128,
- 128, 128, 128, 128, 182, 198, 109, 128, 128, 128, 128, 128 },
- { 152, 173, 119, 128, 128, 128, 128, 128, 128, 164, 193, 128,
- 128, 128, 128, 128, 198, 209, 121, 128, 128, 128, 128, 128 } },
- { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 123, 143, 70, 128, 128, 128, 128, 128, 128, 127, 154, 128,
- 128, 128, 128, 128, 176, 148, 36, 128, 128, 128, 128, 128 },
- { 132, 152, 73, 128, 128, 128, 128, 128, 128, 127, 159, 128,
- 128, 128, 128, 128, 186, 181, 48, 128, 128, 128, 128, 128 } } },
-#endif
- { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 151, 173, 114, 128, 128, 128, 128, 128, 128, 162, 198, 128,
- 128, 128, 128, 128, 182, 198, 109, 128, 128, 128, 128, 128 },
- { 152, 173, 119, 128, 128, 128, 128, 128, 128, 164, 193, 128,
- 128, 128, 128, 128, 198, 209, 121, 128, 128, 128, 128, 128 } },
- { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 123, 143, 70, 128, 128, 128, 128, 128, 128, 127, 154, 128,
- 128, 128, 128, 128, 176, 148, 36, 128, 128, 128, 128, 128 },
- { 132, 152, 73, 128, 128, 128, 128, 128, 128, 127, 159, 128,
- 128, 128, 128, 128, 186, 181, 48, 128, 128, 128, 128, 128 } } },
- { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 109, 105, 78, 44, 128, 128, 128, 128, 128, 146, 185, 221,
- 128, 128, 128, 128, 199, 188, 134, 69, 128, 128, 128, 128 },
- { 124, 127, 115, 82, 128, 128, 128, 128, 128, 162, 198, 224,
- 128, 128, 128, 128, 206, 214, 177, 135, 128, 128, 128, 128 } },
- { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 95, 102, 65, 14, 128, 128, 128, 128, 128, 132, 164, 199,
- 128, 128, 128, 128, 162, 163, 66, 27, 128, 128, 128, 128 },
- { 83, 141, 97, 38, 128, 128, 128, 128, 128, 154, 132, 184,
- 128, 128, 128, 128, 194, 218, 112, 63, 128, 128, 128, 128 } } },
- { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 117, 107, 86, 61, 51, 104, 128, 128, 128, 160, 198, 238,
- 252, 251, 128, 128, 221, 223, 209, 186, 99, 81, 128, 128 },
- { 118, 122, 121, 100, 91, 97, 128, 128, 128, 168, 190, 214,
- 233, 235, 128, 128, 197, 216, 177, 165, 147, 126, 128, 128 } },
- { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 109, 102, 63, 51, 255, 85, 128, 128, 128, 163, 131, 175,
- 128, 128, 128, 128, 183, 102, 40, 1, 128, 128, 128, 128 },
- { 255, 255, 1, 1, 128, 1, 128, 128, 128, 1, 128, 128,
- 128, 128, 128, 128, 255, 1, 128, 128, 128, 128, 128, 128 } } },
- { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 114, 108, 83, 61, 53, 28, 77, 177, 128, 161, 187, 218,
- 240, 237, 228, 234, 200, 207, 167, 136, 98, 78, 183, 128 },
- { 117, 138, 116, 77, 75, 85, 26, 1, 128, 197, 162, 200,
- 184, 212, 225, 236, 189, 225, 168, 124, 144, 171, 128, 128 } },
- { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } }
+static const aom_cdf_prob
+ default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
+ { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700,
+ 32704, 32708, 32712, 32716, 32720, 32724) },
+ { AOM_CDF16(14365, 23603, 28135, 31168, 32167, 32395, 32487, 32573, 32620,
+ 32647, 32668, 32672, 32676, 32680, 32684) },
+ { AOM_CDF16(11532, 22380, 28445, 31360, 32349, 32523, 32584, 32649, 32673,
+ 32677, 32681, 32685, 32689, 32693, 32697) },
+ { AOM_CDF16(26990, 31402, 32282, 32571, 32692, 32696, 32700, 32704, 32708,
+ 32712, 32716, 32720, 32724, 32728, 32732) },
+ { AOM_CDF16(17248, 26058, 28904, 30608, 31305, 31877, 32126, 32321, 32394,
+ 32464, 32516, 32560, 32576, 32593, 32622) },
+ { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144,
+ 32413, 32520, 32594, 32622, 32656, 32660) }
};
-#endif // CONFIG_CTX1D
-#endif // CONFIG_LV_MAP
-#if CONFIG_EXT_PARTITION_TYPES
-static const aom_prob
- default_partition_probs[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1] = {
- // 8x8 -> 4x4
- { 199, 122, 141, 128, 128, 128, 255, 128, 255 }, // a/l both not split
- { 147, 63, 159, 128, 128, 128, 255, 128, 255 }, // a split, l not split
- { 148, 133, 118, 128, 128, 128, 255, 128, 255 }, // l split, a not split
- { 121, 104, 114, 128, 128, 128, 255, 128, 255 }, // a/l both split
- // 16x16 -> 8x8
- { 174, 73, 87, 128, 128, 128, 255, 128, 255 }, // a/l both not split
- { 92, 41, 83, 128, 128, 128, 255, 128, 255 }, // a split, l not split
- { 82, 99, 50, 128, 128, 128, 255, 128, 255 }, // l split, a not split
- { 53, 39, 39, 128, 128, 128, 255, 128, 255 }, // a/l both split
- // 32x32 -> 16x16
- { 177, 58, 59, 128, 128, 85, 128, 85, 128 }, // a/l both not split
- { 68, 26, 63, 128, 128, 85, 128, 85, 128 }, // a split, l not split
- { 52, 79, 25, 128, 128, 85, 128, 85, 128 }, // l split, a not split
- { 17, 14, 12, 128, 128, 85, 128, 85, 128 }, // a/l both split
- // 64x64 -> 32x32
- { 222, 34, 30, 128, 128, 85, 128, 85, 128 }, // a/l both not split
- { 72, 16, 44, 128, 128, 85, 128, 85, 128 }, // a split, l not split
- { 58, 32, 12, 128, 128, 85, 128, 85, 128 }, // l split, a not split
- { 10, 7, 6, 128, 128, 85, 128, 85, 128 }, // a/l both split
-#if CONFIG_EXT_PARTITION
- // 128x128 -> 64x64
- { 222, 34, 30, 128, 128, 128, 255, 128, 255 }, // a/l both not split
- { 72, 16, 44, 128, 128, 128, 255, 128, 255 }, // a split, l not split
- { 58, 32, 12, 128, 128, 128, 255, 128, 255 }, // l split, a not split
- { 10, 7, 6, 128, 128, 128, 255, 128, 255 }, // a/l both split
-#endif // CONFIG_EXT_PARTITION
-#if CONFIG_UNPOISON_PARTITION_CTX
- { 0, 0, 141, 0, 0, 0, 0, 0, 0 }, // 8x8 -> 4x4
- { 0, 0, 87, 0, 0, 0, 0, 0, 0 }, // 16x16 -> 8x8
- { 0, 0, 59, 0, 0, 0, 0, 0, 0 }, // 32x32 -> 16x16
- { 0, 0, 30, 0, 0, 0, 0, 0, 0 }, // 64x64 -> 32x32
-#if CONFIG_EXT_PARTITION
- { 0, 0, 30, 0, 0, 0, 0, 0, 0 }, // 128x128 -> 64x64
-#endif // CONFIG_EXT_PARTITION
- { 0, 122, 0, 0, 0, 0, 0, 0, 0 }, // 8x8 -> 4x4
- { 0, 73, 0, 0, 0, 0, 0, 0, 0 }, // 16x16 -> 8x8
- { 0, 58, 0, 0, 0, 0, 0, 0, 0 }, // 32x32 -> 16x16
- { 0, 34, 0, 0, 0, 0, 0, 0, 0 }, // 64x64 -> 32x32
-#if CONFIG_EXT_PARTITION
- { 0, 34, 0, 0, 0, 0, 0, 0, 0 }, // 128x128 -> 64x64
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_UNPOISON_PARTITION_CTX
- };
-#else
-static const aom_prob
- default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = {
- // 8x8 -> 4x4
- { 199, 122, 141 }, // a/l both not split
- { 147, 63, 159 }, // a split, l not split
- { 148, 133, 118 }, // l split, a not split
- { 121, 104, 114 }, // a/l both split
- // 16x16 -> 8x8
- { 174, 73, 87 }, // a/l both not split
- { 92, 41, 83 }, // a split, l not split
- { 82, 99, 50 }, // l split, a not split
- { 53, 39, 39 }, // a/l both split
- // 32x32 -> 16x16
- { 177, 58, 59 }, // a/l both not split
- { 68, 26, 63 }, // a split, l not split
- { 52, 79, 25 }, // l split, a not split
- { 17, 14, 12 }, // a/l both split
- // 64x64 -> 32x32
- { 222, 34, 30 }, // a/l both not split
- { 72, 16, 44 }, // a split, l not split
- { 58, 32, 12 }, // l split, a not split
- { 10, 7, 6 }, // a/l both split
-#if CONFIG_EXT_PARTITION
- // 128x128 -> 64x64
- { 222, 34, 30 }, // a/l both not split
- { 72, 16, 44 }, // a split, l not split
- { 58, 32, 12 }, // l split, a not split
- { 10, 7, 6 }, // a/l both split
-#endif // CONFIG_EXT_PARTITION
-#if CONFIG_UNPOISON_PARTITION_CTX
- { 0, 0, 141 }, // 8x8 -> 4x4
- { 0, 0, 87 }, // 16x16 -> 8x8
- { 0, 0, 59 }, // 32x32 -> 16x16
- { 0, 0, 30 }, // 64x64 -> 32x32
-#if CONFIG_EXT_PARTITION
- { 0, 0, 30 }, // 128x128 -> 64x64
-#endif // CONFIG_EXT_PARTITION
- { 0, 122, 0 }, // 8x8 -> 4x4
- { 0, 73, 0 }, // 16x16 -> 8x8
- { 0, 58, 0 }, // 32x32 -> 16x16
- { 0, 34, 0 }, // 64x64 -> 32x32
-#if CONFIG_EXT_PARTITION
- { 0, 34, 0 }, // 128x128 -> 64x64
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_UNPOISON_PARTITION_CTX
+static const aom_cdf_prob
+ default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
+ SWITCHABLE_FILTERS)] = {
+ { AOM_CDF3(31935, 32720) }, { AOM_CDF3(5568, 32719) },
+ { AOM_CDF3(422, 2938) }, { AOM_CDF3(28244, 32608) },
+ { AOM_CDF3(31206, 31953) }, { AOM_CDF3(4862, 32121) },
+ { AOM_CDF3(770, 1152) }, { AOM_CDF3(20889, 25637) },
+ { AOM_CDF3(31910, 32724) }, { AOM_CDF3(4120, 32712) },
+ { AOM_CDF3(305, 2247) }, { AOM_CDF3(27403, 32636) },
+ { AOM_CDF3(31022, 32009) }, { AOM_CDF3(2963, 32093) },
+ { AOM_CDF3(601, 943) }, { AOM_CDF3(14969, 21398) }
};
-#endif // CONFIG_EXT_PARTITION_TYPES
-static const aom_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = {
- 155, 116, 94, 32, 96, 56, 30,
-};
-
-static const aom_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = {
- 45, 13,
-};
+static const aom_cdf_prob default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)] =
+ { { AOM_CDF2(24035) }, { AOM_CDF2(16630) }, { AOM_CDF2(15339) },
+ { AOM_CDF2(8386) }, { AOM_CDF2(12222) }, { AOM_CDF2(4676) } };
-static const aom_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = {
- 178, 212, 135, 244, 203, 122, 128, 128, 128,
-};
+static const aom_cdf_prob default_zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(
+ 2)] = { { AOM_CDF2(2175) }, { AOM_CDF2(1054) } };
-static const aom_prob default_drl_prob[DRL_MODE_CONTEXTS] = {
- 119, 128, 189, 134, 128,
-};
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)] =
- { { AOM_ICDF(128 * 155), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 116), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 94), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 32), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 96), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 56), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 30), AOM_ICDF(32768), 0 } };
-static const aom_cdf_prob default_zeromv_cdf[ZEROMV_MODE_CONTEXTS][CDF_SIZE(
- 2)] = { { AOM_ICDF(128 * 45), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 13), AOM_ICDF(32768), 0 } };
static const aom_cdf_prob default_refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)] =
- { { AOM_ICDF(128 * 178), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 212), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 135), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 244), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 203), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 122), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 } };
+ { { AOM_CDF2(23974) }, { AOM_CDF2(24188) }, { AOM_CDF2(17848) },
+ { AOM_CDF2(28622) }, { AOM_CDF2(24312) }, { AOM_CDF2(19923) } };
+
static const aom_cdf_prob default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_ICDF(128 * 119), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 189), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 134), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 }
+ { AOM_CDF2(13104) }, { AOM_CDF2(24560) }, { AOM_CDF2(18945) }
};
-#endif
-
-static const aom_prob default_inter_compound_mode_probs
- [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = {
- { 154, 167, 233, 165, 143, 170, 167 }, // 0 = both zero mv
- { 75, 168, 237, 155, 135, 176, 172 }, // 1 = 1 zero + 1 predicted
- { 7, 173, 227, 128, 153, 188, 189 }, // 2 = two predicted mvs
- { 8, 120, 214, 113, 154, 178, 174 }, // 3 = 1 pred/zero, 1 new
- { 4, 85, 194, 94, 155, 173, 167 }, // 4 = two new mvs
- { 23, 89, 180, 73, 157, 151, 155 }, // 5 = one intra neighbour
- { 27, 49, 152, 91, 134, 153, 142 }, // 6 = two intra neighbours
- };
static const aom_cdf_prob
default_inter_compound_mode_cdf[INTER_MODE_CONTEXTS][CDF_SIZE(
INTER_COMPOUND_MODES)] = {
- { AOM_ICDF(19712), AOM_ICDF(28229), AOM_ICDF(30892), AOM_ICDF(31437),
- AOM_ICDF(31712), AOM_ICDF(32135), AOM_ICDF(32360), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9600), AOM_ICDF(24804), AOM_ICDF(29268), AOM_ICDF(30323),
- AOM_ICDF(30802), AOM_ICDF(31726), AOM_ICDF(32177), AOM_ICDF(32768), 0 },
- { AOM_ICDF(896), AOM_ICDF(22434), AOM_ICDF(27015), AOM_ICDF(29026),
- AOM_ICDF(29753), AOM_ICDF(31114), AOM_ICDF(31597), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(15904), AOM_ICDF(22127), AOM_ICDF(25421),
- AOM_ICDF(26864), AOM_ICDF(28996), AOM_ICDF(30001), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(11222), AOM_ICDF(17217), AOM_ICDF(21445),
- AOM_ICDF(23473), AOM_ICDF(26133), AOM_ICDF(27550), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2944), AOM_ICDF(13313), AOM_ICDF(17214), AOM_ICDF(20751),
- AOM_ICDF(23211), AOM_ICDF(25500), AOM_ICDF(26992), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3456), AOM_ICDF(9067), AOM_ICDF(14069), AOM_ICDF(16907),
- AOM_ICDF(18817), AOM_ICDF(21214), AOM_ICDF(23139), AOM_ICDF(32768), 0 }
- };
-
-#if CONFIG_COMPOUND_SINGLEREF
-// TODO(zoeliu): Default values to be further adjusted based on the collected
-// stats.
-/*
-static const aom_prob default_inter_singleref_comp_mode_probs
- [INTER_MODE_CONTEXTS][INTER_SINGLEREF_COMP_MODES - 1] = {
- { 2, 173, 68, 180 }, // 0 = both zero mv
- { 7, 145, 160, 180 }, // 1 = 1 zero + 1 predicted
- { 7, 166, 126, 180 }, // 2 = two predicted mvs
- { 7, 94, 132, 180 }, // 3 = 1 pred/zero, 1 new
- { 8, 64, 64, 180 }, // 4 = two new mvs
- { 17, 81, 52, 180 }, // 5 = one intra neighbour
- { 25, 29, 50, 180 }, // 6 = two intra neighbours
- };*/
-static const aom_prob default_inter_singleref_comp_mode_probs
- [INTER_MODE_CONTEXTS][INTER_SINGLEREF_COMP_MODES - 1] = {
- { 2, 173, 68 }, // 0 = both zero mv
- { 7, 145, 160 }, // 1 = 1 zero + 1 predicted
- { 7, 166, 126 }, // 2 = two predicted mvs
- { 7, 94, 132 }, // 3 = 1 pred/zero, 1 new
- { 8, 64, 64 }, // 4 = two new mvs
- { 17, 81, 52 }, // 5 = one intra neighbour
- { 25, 29, 50 }, // 6 = two intra neighbours
- };
-
-static const aom_cdf_prob
- default_inter_singleref_comp_mode_cdf[INTER_MODE_CONTEXTS][CDF_SIZE(
- INTER_SINGLEREF_COMP_MODES)] = {
- { AOM_ICDF(21971), AOM_ICDF(24771), AOM_ICDF(25027), AOM_ICDF(32768), 0 },
- { AOM_ICDF(18053), AOM_ICDF(26690), AOM_ICDF(27586), AOM_ICDF(32768), 0 },
- { AOM_ICDF(20667), AOM_ICDF(26182), AOM_ICDF(27078), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11703), AOM_ICDF(22103), AOM_ICDF(22999), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7936), AOM_ICDF(13888), AOM_ICDF(14912), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9679), AOM_ICDF(13927), AOM_ICDF(16103), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3349), AOM_ICDF(8470), AOM_ICDF(11670), AOM_ICDF(32768), 0 }
+ { AOM_CDF8(7760, 13823, 15808, 17641, 19156, 20666, 26891) },
+ { AOM_CDF8(10730, 19452, 21145, 22749, 24039, 25131, 28724) },
+ { AOM_CDF8(10664, 20221, 21588, 22906, 24295, 25387, 28436) },
+ { AOM_CDF8(13298, 16984, 20471, 24182, 25067, 25736, 26422) },
+ { AOM_CDF8(18904, 23325, 25242, 27432, 27898, 28258, 30758) },
+ { AOM_CDF8(10725, 17454, 20124, 22820, 24195, 25168, 26046) },
+ { AOM_CDF8(17125, 24273, 25814, 27492, 28214, 28704, 30592) },
+ { AOM_CDF8(13046, 23214, 24505, 25942, 27435, 28442, 29330) }
};
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-static const aom_prob
- default_compound_type_probs[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 128, 128 }, { 128, 128 }, { 128, 128 },
-#endif
- { 128, 128 }, { 255, 128 }, { 255, 128 }, { 66, 51 }, { 72, 35 },
- { 79, 29 }, { 71, 18 }, { 81, 29 }, { 81, 26 }, { 69, 19 },
- { 104, 1 }, { 99, 1 }, { 75, 1 },
-#if CONFIG_EXT_PARTITION
- { 255, 1 }, { 255, 1 }, { 255, 1 },
-#endif // CONFIG_EXT_PARTITION
- { 208, 128 }, { 208, 128 }, { 208, 128 }, { 208, 128 }, { 208, 1 },
- { 208, 1 },
-#if CONFIG_EXT_PARTITION
- { 208, 1 }, { 208, 1 }
-#endif // CONFIG_EXT_PARTITION
- };
-#elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-static const aom_prob
- default_compound_type_probs[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 255 }, { 255 }, { 255 },
-#endif
- { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 216 },
- { 216 }, { 216 }, { 224 }, { 224 }, { 240 }, { 240 },
-#if CONFIG_EXT_PARTITION
- { 255 }, { 255 }, { 255 },
-#endif // CONFIG_EXT_PARTITION
- { 208 }, { 208 }, { 208 }, { 208 }, { 255 }, { 255 },
-#if CONFIG_EXT_PARTITION
- { 255 }, { 255 }
-#endif // CONFIG_EXT_PARTITION
- };
-#elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
-static const aom_prob
- default_compound_type_probs[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 255 }, { 255 }, { 255 },
-#endif
- { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 216 },
- { 216 }, { 216 }, { 224 }, { 224 }, { 240 }, { 240 },
-#if CONFIG_EXT_PARTITION
- { 255 }, { 255 }, { 255 },
-#endif // CONFIG_EXT_PARTITION
- { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
-#if CONFIG_EXT_PARTITION
- { 208 }, { 208 }
-#endif // CONFIG_EXT_PARTITION
- };
-#else
-static const aom_prob default_compound_type_probs[BLOCK_SIZES_ALL]
- [COMPOUND_TYPES - 1];
-#endif // CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-
-#if CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-static const aom_cdf_prob
- default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32704), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32704), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8448), AOM_ICDF(13293), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(12436), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10112), AOM_ICDF(12679), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9088), AOM_ICDF(10753), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10368), AOM_ICDF(12906), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10368), AOM_ICDF(12643), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8832), AOM_ICDF(10609), AOM_ICDF(32768), 0 },
- { AOM_ICDF(13312), AOM_ICDF(13388), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12672), AOM_ICDF(12751), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9600), AOM_ICDF(9691), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32641), AOM_ICDF(32768), 0 }, // 255, 1
- { AOM_ICDF(32640), AOM_ICDF(32641), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32641), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 }, // 208, 1
- { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
-#endif
- };
-#elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-static const aom_cdf_prob
- default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 }, // 255
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 }, // 208
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 }, // 216
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28672), AOM_ICDF(32768), 0 }, // 224
- { AOM_ICDF(28672), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30720), AOM_ICDF(32768), 0 }, // 240
- { AOM_ICDF(30720), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 }, // 255
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- };
-#elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
-static const aom_cdf_prob
- default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 }, // 255
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 }, // 208
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 }, // 216
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(27648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28672), AOM_ICDF(32768), 0 }, // 224
- { AOM_ICDF(28672), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30720), AOM_ICDF(32768), 0 }, // 240
- { AOM_ICDF(30720), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 }, // 255
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 }, // 208
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- };
-#endif // CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-
-#if CONFIG_INTERINTRA
-static const aom_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = {
- 128, 226, 244, 254,
-};
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob default_interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
- 2)] = { { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(226 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(244 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(254 * 128), AOM_ICDF(32768), 0 } };
-#endif
+ 2)] = { { AOM_CDF2(16384) },
+ { AOM_CDF2(26887) },
+ { AOM_CDF2(27597) },
+ { AOM_CDF2(30237) } };
-static const aom_prob
- default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1] = {
- { 128, 128, 128 }, // block_size < 8x8
- { 24, 34, 119 }, // block_size < 16x16
- { 38, 33, 95 }, // block_size < 32x32
- { 51, 21, 110 }, // block_size >= 32x32
- };
static const aom_cdf_prob
- default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
- INTERINTRA_MODES)] = {
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3072), AOM_ICDF(7016), AOM_ICDF(18987), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4864), AOM_ICDF(8461), AOM_ICDF(17481), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6528), AOM_ICDF(8681), AOM_ICDF(19031), AOM_ICDF(32768), 0 }
- };
-
-static const aom_prob default_wedge_interintra_prob[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 128, 128, 128,
-#endif
- 128, 128, 128, 194, 213, 217, 222, 224, 226, 220, 128, 128, 128,
-#if CONFIG_EXT_PARTITION
- 255, 255, 255,
-#endif // CONFIG_EXT_PARTITION
- 208, 208, 208, 208, 255, 255,
-#if CONFIG_EXT_PARTITION
- 255, 255
-#endif // CONFIG_EXT_PARTITION
-};
+ default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTERINTRA_MODES)] =
+ { { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(1875, 11082, 27332) },
+ { AOM_CDF4(2473, 9996, 26388) },
+ { AOM_CDF4(4238, 11537, 25926) } };
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob
default_wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(194 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(213 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(217 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(222 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(224 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(226 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(220 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(20036) }, { AOM_CDF2(24957) }, { AOM_CDF2(26704) },
+ { AOM_CDF2(27530) }, { AOM_CDF2(29564) }, { AOM_CDF2(29444) },
+ { AOM_CDF2(26872) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }
};
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_INTERINTRA
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-#ifdef TWO_MODE
-const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)] = {
- -NCOBMC_MODE_0, -NCOBMC_MODE_1
-};
-#else
-const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)] = {
- -NCOBMC_MODE_0, 2,
- -NCOBMC_MODE_1, 4,
- -NCOBMC_MODE_2, 6,
- -NCOBMC_MODE_3, 8,
- -NCOBMC_MODE_4, 10,
- -NCOBMC_MODE_5, 12,
- -NCOBMC_MODE_6, -NCOBMC_MODE_7
-};
-#endif // TWO_MODE
-
-// TODO(weitinglin): find default prob
-// right now setting the first mode with probability 1/255,
-// the last eight modes with equal probabilities
-static const aom_prob
- default_ncobmc_mode_prob[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES - 1] = {
-#ifdef TWO_MODE
- { 127 }, { 127 }, { 127 }, { 127 }
-#else
- { 32, 36, 43, 51, 64, 85, 128 }, // 8x8
- { 32, 36, 43, 51, 64, 85, 128 }, // 16X16
- { 32, 36, 43, 51, 64, 85, 128 }, // 32X32
- { 32, 36, 43, 51, 64, 85, 128 } // 64X64
-#endif // TWO_MODE
- };
-static const aom_cdf_prob
- default_ncobmc_mode_cdf[ADAPT_OVERLAP_BLOCKS][CDF_SIZE(MAX_NCOBMC_MODES)] =
-#ifdef TWO_MODE
- { { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16256), AOM_ICDF(32768), 0 } };
-#else
- { { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
- AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
- AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
- AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
- AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
- 0 } };
-#endif // TWO_MODEE
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-// Change this section appropriately once warped motion is supported
-#if CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
- -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -NCOBMC_ADAPT_WEIGHT,
-};
-static const aom_prob
- default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 255, 255 },
- { 255, 255 },
- { 255, 255 },
-#endif
- { 255, 255 },
- { 255, 255 },
- { 255, 255 },
- /** Only these nine block sizes allow ncobmc_adapt_weight **/
- { 45, 207 },
- { 42, 211 },
- { 34, 207 },
- { 181, 123 },
- { 129, 141 },
- { 15, 209 },
- { 231, 122 },
- { 195, 190 },
- { 168, 190 },
- /** ----------------------------------------------------- **/
- { 244, 255 },
-#if CONFIG_EXT_PARTITION
- { 252, 255 },
- { 252, 255 },
- { 252, 255 },
-#endif // CONFIG_EXT_PARTITION
- { 255, 200 },
- { 255, 200 },
- { 255, 200 },
- { 255, 200 },
-#if CONFIG_EXT_PARTITION
- { 252, 255 },
- { 252, 200 },
- { 252, 200 },
-#endif // CONFIG_EXT_PARTITION
- };
static const aom_cdf_prob
- default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
-#endif
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
- /** Only these seven block sizes allow ncobmc_adapt_weight **/
- { AOM_ICDF(5702), AOM_ICDF(27555), AOM_ICDF(32768), 0 },
- { AOM_ICDF(5408), AOM_ICDF(27964), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4330), AOM_ICDF(27298), AOM_ICDF(32768), 0 },
- { AOM_ICDF(23107), AOM_ICDF(27760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16490), AOM_ICDF(25461), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1959), AOM_ICDF(27153), AOM_ICDF(32768), 0 },
- { AOM_ICDF(29530), AOM_ICDF(31073), AOM_ICDF(32768), 0 },
- { AOM_ICDF(25057), AOM_ICDF(30840), AOM_ICDF(32768), 0 },
- { AOM_ICDF(21588), AOM_ICDF(29940), AOM_ICDF(32768), 0 },
- /** ----------------------------------------------------- **/
- { AOM_ICDF(244 * 128), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 }
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
-#endif
- };
-#else // CONFIG_NCOBMC_ADAPT_WEIGHT
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
- -SIMPLE_TRANSLATION, -OBMC_CAUSAL
-};
-
-static const aom_prob
- default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 255 }, { 255 }, { 255 },
-#endif
- { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
- { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
-#if CONFIG_EXT_PARTITION
- { 252 }, { 252 }, { 252 },
-#endif // CONFIG_EXT_PARTITION
- { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
-#if CONFIG_EXT_PARTITION
- { 208 }, { 208 }
-#endif // CONFIG_EXT_PARTITION
- };
+ default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)] = {
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(23431) }, { AOM_CDF2(13171) }, { AOM_CDF2(11470) },
+ { AOM_CDF2(9770) }, { AOM_CDF2(9100) }, { AOM_CDF2(8233) },
+ { AOM_CDF2(6172) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(11820) }, { AOM_CDF2(7701) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }
+ };
+
+static const aom_cdf_prob default_wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)] =
+ { { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2438, 4440, 6599, 8663, 11005, 12874, 15751, 18094, 20359,
+ 22362, 24127, 25702, 27752, 29450, 31171) },
+ { AOM_CDF16(806, 3266, 6005, 6738, 7218, 7367, 7771, 14588, 16323, 17367,
+ 18452, 19422, 22839, 26127, 29629) },
+ { AOM_CDF16(2779, 3738, 4683, 7213, 7775, 8017, 8655, 14357, 17939, 21332,
+ 24520, 27470, 29456, 30529, 31656) },
+ { AOM_CDF16(1684, 3625, 5675, 7108, 9302, 11274, 14429, 17144, 19163,
+ 20961, 22884, 24471, 26719, 28714, 30877) },
+ { AOM_CDF16(1142, 3491, 6277, 7314, 8089, 8355, 9023, 13624, 15369, 16730,
+ 18114, 19313, 22521, 26012, 29550) },
+ { AOM_CDF16(2742, 4195, 5727, 8035, 8980, 9336, 10146, 14124, 17270,
+ 20533, 23434, 25972, 27944, 29570, 31416) },
+ { AOM_CDF16(1727, 3948, 6101, 7796, 9841, 12344, 15766, 18944, 20638,
+ 22038, 23963, 25311, 26988, 28766, 31012) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(154, 987, 1925, 2051, 2088, 2111, 2151, 23033, 23703, 24284,
+ 24985, 25684, 27259, 28883, 30911) },
+ { AOM_CDF16(1135, 1322, 1493, 2635, 2696, 2737, 2770, 21016, 22935, 25057,
+ 27251, 29173, 30089, 30960, 31933) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) },
+ { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
+ 20480, 22528, 24576, 26624, 28672, 30720) } };
+
+static const aom_cdf_prob default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(
+ MOTION_MODES)] = { { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) }, { AOM_CDF3(7651, 24760) },
+ { AOM_CDF3(4738, 24765) }, { AOM_CDF3(5391, 25528) },
+ { AOM_CDF3(19419, 26810) }, { AOM_CDF3(5123, 23606) },
+ { AOM_CDF3(11606, 24308) }, { AOM_CDF3(26260, 29116) },
+ { AOM_CDF3(20360, 28062) }, { AOM_CDF3(21679, 26830) },
+ { AOM_CDF3(29516, 30701) }, { AOM_CDF3(28898, 30397) },
+ { AOM_CDF3(30878, 31335) }, { AOM_CDF3(32507, 32558) },
+ { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(28799, 31390) }, { AOM_CDF3(26431, 30774) },
+ { AOM_CDF3(28973, 31594) }, { AOM_CDF3(29742, 31203) } };
-static const aom_cdf_prob
- default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(151 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(153 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(144 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(178 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(165 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(207 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(195 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(168 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(244 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- };
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#elif !CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
- -SIMPLE_TRANSLATION, -WARPED_CAUSAL
-};
-
-static const aom_prob
- default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 255 }, { 255 }, { 255 },
-#endif
- { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
- { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
-#if CONFIG_EXT_PARTITION
- { 252 }, { 252 }, { 252 },
-#endif // CONFIG_EXT_PARTITION
- { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
-#if CONFIG_EXT_PARTITION
- { 252 }, { 252 }
-#endif // CONFIG_EXT_PARTITION
- };
-
-static const aom_cdf_prob
- default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(151 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(153 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(144 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(178 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(165 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(207 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(195 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(168 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(244 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- };
-
-#elif CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
- -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, 4, -NCOBMC_ADAPT_WEIGHT, -WARPED_CAUSAL
-};
-
-static const aom_prob default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES -
- 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 128, 128, 255 }, { 128, 128, 128 }, { 128, 128, 128 },
-#endif
- { 128, 128, 128 }, { 128, 128, 128 }, { 128, 128, 128 }, { 62, 115, 128 },
- { 39, 131, 128 }, { 39, 132, 128 }, { 118, 94, 128 }, { 77, 125, 128 },
- { 100, 121, 128 }, { 190, 66, 128 }, { 207, 102, 128 }, { 197, 100, 128 },
- { 239, 76, 128 },
-#if CONFIG_EXT_PARTITION
- { 252, 200, 128 }, { 252, 200, 128 }, { 252, 200, 128 },
-#endif // CONFIG_EXT_PARTITION
- { 208, 200, 128 }, { 208, 200, 128 }, { 208, 200, 128 }, { 208, 200, 128 }
-};
-static const aom_cdf_prob
- default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- /** Only these nine block sizes allow ncobmc_adapt_weight **/
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- /***********************************************************/
- { AOM_ICDF(30592), AOM_ICDF(31238), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 }
- };
-
-const aom_tree_index av1_ncobmc_tree[TREE_SIZE(OBMC_FAMILY_MODES)] = {
- -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -NCOBMC_ADAPT_WEIGHT
-};
-
-static const aom_prob
- default_ncobmc_prob[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 128, 255 }, { 128, 255 }, { 128, 255 },
-#endif
- { 128, 255 }, { 128, 255 }, { 128, 255 }, { 45, 255 }, { 79, 255 },
- { 75, 255 }, { 130, 255 }, { 141, 255 }, { 144, 255 }, { 208, 255 },
- { 201, 255 }, { 186, 255 }, { 231, 255 },
-#if CONFIG_EXT_PARTITION
- { 252, 255 }, { 252, 255 }, { 252, 255 },
-#endif // CONFIG_EXT_PARTITION
- { 208, 255 }, { 208, 255 }, { 208, 255 }, { 208, 255 }
- };
-
-static const aom_cdf_prob
- default_ncobmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(OBMC_FAMILY_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- /** Only these nine block sizes allow ncobmc_adapt_weight **/
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
- /***********************************************************/
- { AOM_ICDF(231 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 }
- };
-#else
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
- -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -WARPED_CAUSAL,
-};
-
-static const aom_prob
- default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 128, 128 }, { 128, 128 }, { 128, 128 },
-#endif
- { 128, 128 }, { 128, 128 }, { 128, 128 }, { 62, 115 }, { 39, 131 },
- { 39, 132 }, { 118, 94 }, { 77, 125 }, { 100, 121 }, { 190, 66 },
- { 207, 102 }, { 197, 100 }, { 239, 76 },
-#if CONFIG_EXT_PARTITION
- { 252, 200 }, { 252, 200 }, { 252, 200 },
-#endif // CONFIG_EXT_PARTITION
- { 208, 200 }, { 208, 200 }, { 208, 200 }, { 208, 200 }, { 208, 200 },
- { 208, 200 },
-#if CONFIG_EXT_PARTITION
- { 252, 200 }, { 252, 200 }
-#endif // CONFIG_EXT_PARTITION
- };
-static const aom_cdf_prob
- default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7936), AOM_ICDF(19091), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4991), AOM_ICDF(19205), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4992), AOM_ICDF(19314), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15104), AOM_ICDF(21590), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9855), AOM_ICDF(21043), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12800), AOM_ICDF(22238), AOM_ICDF(32768), 0 },
- { AOM_ICDF(24320), AOM_ICDF(26498), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26496), AOM_ICDF(28995), AOM_ICDF(32768), 0 },
- { AOM_ICDF(25216), AOM_ICDF(28166), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30592), AOM_ICDF(31238), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
-#endif
- };
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-// Probability for the case that only 1 additional motion mode is allowed
-static const aom_prob default_obmc_prob[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 128, 128, 128,
-#endif
- 128, 128, 128, 45, 79, 75, 130, 141, 144, 208, 201, 186, 231,
-#if CONFIG_EXT_PARTITION
- 252, 252, 252,
-#endif // CONFIG_EXT_PARTITION
- 208, 208, 208, 208, 208, 208,
-#if CONFIG_EXT_PARTITION
- 252, 252
-#endif // CONFIG_EXT_PARTITION
-};
-
-#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(45 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(79 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(75 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(130 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(141 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(144 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(201 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(186 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(231 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_EXT_PARTITION
-};
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif
-
-static const aom_prob default_delta_q_probs[DELTA_Q_PROBS] = { 220, 220, 220 };
-static const aom_cdf_prob default_delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)] = {
- AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0
-};
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
-static const aom_prob
- default_delta_lf_multi_probs[FRAME_LF_COUNT][DELTA_LF_PROBS] = {
- { 220, 220, 220 }, { 220, 220, 220 }, { 220, 220, 220 }, { 220, 220, 220 }
- };
-static const aom_cdf_prob
- default_delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)] = {
- { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 }
- };
-#endif // CONFIG_LOOPFILTER_LEVEL
-static const aom_prob default_delta_lf_probs[DELTA_LF_PROBS] = { 220, 220,
- 220 };
-static const aom_cdf_prob default_delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)] = {
- AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0
-};
-#endif
-
-/* clang-format off */
-#if CONFIG_INTERINTRA
-const aom_tree_index av1_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = {
- -II_DC_PRED, 2, /* 0 = II_DC_NODE */
- -II_SMOOTH_PRED, 4, /* 1 = II_SMOOTH_PRED */
- -II_V_PRED, -II_H_PRED /* 2 = II_V_NODE */
-};
-#endif // CONFIG_INTERINTRA
-
-const aom_tree_index av1_inter_compound_mode_tree
- [TREE_SIZE(INTER_COMPOUND_MODES)] = {
- -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2,
- -INTER_COMPOUND_OFFSET(NEAREST_NEARESTMV), 4,
- 6, -INTER_COMPOUND_OFFSET(NEW_NEWMV),
- -INTER_COMPOUND_OFFSET(NEAR_NEARMV), 8,
- 10, 12,
- -INTER_COMPOUND_OFFSET(NEAREST_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARESTMV),
- -INTER_COMPOUND_OFFSET(NEAR_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARMV)
-};
-
-#if CONFIG_COMPOUND_SINGLEREF
-// TODO(zoeliu): To redesign the tree structure once the number of mode changes.
-/*
-const aom_tree_index av1_inter_singleref_comp_mode_tree
- [TREE_SIZE(INTER_SINGLEREF_COMP_MODES)] = {
- -INTER_SINGLEREF_COMP_OFFSET(SR_ZERO_NEWMV), 2,
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEAREST_NEARMV), 4,
- 6, -INTER_SINGLEREF_COMP_OFFSET(SR_NEW_NEWMV),
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEAREST_NEWMV),
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEAR_NEWMV)
-};*/
-
-const aom_tree_index av1_inter_singleref_comp_mode_tree
- [TREE_SIZE(INTER_SINGLEREF_COMP_MODES)] = {
- -INTER_SINGLEREF_COMP_OFFSET(SR_ZERO_NEWMV), 2,
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEAREST_NEARMV), 4,
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEAR_NEWMV),
- -INTER_SINGLEREF_COMP_OFFSET(SR_NEW_NEWMV)
-};
-#endif // CONFIG_COMPOUND_SINGLEREF
-
-#if CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {
- -COMPOUND_AVERAGE, 2, -COMPOUND_WEDGE, -COMPOUND_SEG
-};
-#elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {
- -COMPOUND_AVERAGE, -COMPOUND_WEDGE
-};
-#elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
-const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {
- -COMPOUND_AVERAGE, -COMPOUND_SEG
-};
-#else
-const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {};
-#endif // CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
-/* clang-format on */
-
-const aom_tree_index av1_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
- -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT
-};
-
-#if CONFIG_EXT_PARTITION_TYPES
-/* clang-format off */
-const aom_tree_index av1_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = {
- -PARTITION_NONE, 2,
- 6, 4,
- 8, -PARTITION_SPLIT,
- -PARTITION_HORZ, 10,
- -PARTITION_VERT, 14,
-
- -PARTITION_HORZ_A, 12,
- -PARTITION_HORZ_B, -PARTITION_HORZ_4,
-
- -PARTITION_VERT_A, 16,
- -PARTITION_VERT_B, -PARTITION_VERT_4
-};
-/* clang-format on */
-#endif // CONFIG_EXT_PARTITION_TYPES
-
-static const aom_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
- 6, 97, 151, 205,
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(10437) }, { AOM_CDF2(9371) }, { AOM_CDF2(9301) },
+ { AOM_CDF2(17432) }, { AOM_CDF2(14423) }, { AOM_CDF2(15142) },
+ { AOM_CDF2(25817) }, { AOM_CDF2(22823) }, { AOM_CDF2(22083) },
+ { AOM_CDF2(30128) }, { AOM_CDF2(31014) }, { AOM_CDF2(31560) },
+ { AOM_CDF2(32638) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(23664) }, { AOM_CDF2(20901) }, { AOM_CDF2(24008) },
+ { AOM_CDF2(26879) }
};
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob
- default_intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_ICDF(768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12416), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19328), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26240), AOM_ICDF(32768), 0 }
- };
-#endif
+static const aom_cdf_prob default_intra_inter_cdf[INTRA_INTER_CONTEXTS]
+ [CDF_SIZE(2)] = {
+ { AOM_CDF2(806) },
+ { AOM_CDF2(16662) },
+ { AOM_CDF2(20186) },
+ { AOM_CDF2(26538) }
+ };
-static const aom_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
- 190, 156, 91, 77, 22
-};
-
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(
- 2)] = { { AOM_ICDF(24290), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19956), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11641), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9804), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2842), AOM_ICDF(32768), 0 } };
-#endif // CONFIG_NEW_MULTISYMBOL
-
-#if CONFIG_EXT_COMP_REFS
-static const aom_prob default_comp_ref_type_p[COMP_REF_TYPE_CONTEXTS] = {
- 8, 20, 78, 91, 194
-};
-static const aom_prob
- default_uni_comp_ref_p[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1] = {
- { 88, 30, 28 }, { 218, 97, 105 }, { 254, 180, 196 }
- };
+ 2)] = { { AOM_CDF2(26828) },
+ { AOM_CDF2(24035) },
+ { AOM_CDF2(12031) },
+ { AOM_CDF2(10640) },
+ { AOM_CDF2(2901) } };
+
+static const aom_cdf_prob default_comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS]
+ [CDF_SIZE(2)] = {
+ { AOM_CDF2(1198) },
+ { AOM_CDF2(2070) },
+ { AOM_CDF2(9166) },
+ { AOM_CDF2(7499) },
+ { AOM_CDF2(22475) }
+ };
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob
- default_comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_ICDF(8 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(20 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(78 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(91 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(194 * 128), AOM_ICDF(32768), 0 }
- };
-static const aom_cdf_prob
- default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
- [CDF_SIZE(2)] = {
- { { AOM_ICDF(88 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(218 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(97 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(105 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(254 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(180 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(196 * 128), AOM_ICDF(32768), 0 } }
- };
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_EXT_COMP_REFS
-
-#if CONFIG_EXT_REFS
-static const aom_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = {
- { 28, 10, 8 },
- { 77, 27, 26 },
- { 127, 62, 56 },
- { 186, 126, 160 },
- { 236, 143, 172 }
-};
-
-static const aom_prob default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1] = {
- { 22, 13 }, { 140, 124 }, { 241, 239 }, { 128, 128 }, { 128, 128 }
-};
+ default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS -
+ 1][CDF_SIZE(2)] = {
+ { { AOM_CDF2(5284) }, { AOM_CDF2(3865) }, { AOM_CDF2(3128) } },
+ { { AOM_CDF2(23152) }, { AOM_CDF2(14173) }, { AOM_CDF2(15270) } },
+ { { AOM_CDF2(31774) }, { AOM_CDF2(25120) }, { AOM_CDF2(26710) } }
+ };
+
+static const aom_cdf_prob default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1]
+ [CDF_SIZE(2)] = {
+ { { AOM_CDF2(4897) },
+ { AOM_CDF2(1555) },
+ { AOM_CDF2(4236) },
+ { AOM_CDF2(8650) },
+ { AOM_CDF2(904) },
+ { AOM_CDF2(1444) } },
+ { { AOM_CDF2(16973) },
+ { AOM_CDF2(16751) },
+ { AOM_CDF2(19647) },
+ { AOM_CDF2(24773) },
+ { AOM_CDF2(11014) },
+ { AOM_CDF2(15087) } },
+ { { AOM_CDF2(29744) },
+ { AOM_CDF2(30279) },
+ { AOM_CDF2(31194) },
+ { AOM_CDF2(31895) },
+ { AOM_CDF2(26875) },
+ { AOM_CDF2(30304) } }
+ };
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob
default_comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)] = {
- { { AOM_ICDF(3556), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1217), AOM_ICDF(32768), 0 },
- { AOM_ICDF(988), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(9857), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3394), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3303), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(16237), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7946), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7195), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(23826), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16124), AOM_ICDF(32768), 0 },
- { AOM_ICDF(20536), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(30195), AOM_ICDF(32768), 0 },
- { AOM_ICDF(18344), AOM_ICDF(32768), 0 },
- { AOM_ICDF(21980), AOM_ICDF(32768), 0 } }
+ { { AOM_CDF2(4946) }, { AOM_CDF2(9468) }, { AOM_CDF2(1503) } },
+ { { AOM_CDF2(19891) }, { AOM_CDF2(22441) }, { AOM_CDF2(15160) } },
+ { { AOM_CDF2(30731) }, { AOM_CDF2(31059) }, { AOM_CDF2(27544) } }
};
static const aom_cdf_prob
default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)] = {
- { { AOM_ICDF(2762), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1614), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(17976), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15912), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(30894), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30639), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 } }
+ { { AOM_CDF2(2235) }, { AOM_CDF2(1423) } },
+ { { AOM_CDF2(17182) }, { AOM_CDF2(15175) } },
+ { { AOM_CDF2(30606) }, { AOM_CDF2(30489) } }
};
-#endif // CONFIG_NEW_MULTISYMBOL
-#else // !CONFIG_EXT_REFS
-
-static const aom_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
- { 43 }, { 100 }, { 137 }, { 212 }, { 229 },
-};
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob
- default_comp_ref_cdf[REF_CONTEXTS][COMP_REFS - 1][CDF_SIZE(2)] = {
- { { AOM_ICDF(43 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(100 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(137 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(212 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(229 * 128), AOM_ICDF(32768), 0 } }
+ default_palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
+ { AOM_CDF7(7952, 13000, 18149, 21478, 25527, 29241) },
+ { AOM_CDF7(7139, 11421, 16195, 19544, 23666, 28073) },
+ { AOM_CDF7(7788, 12741, 17325, 20500, 24315, 28530) },
+ { AOM_CDF7(8271, 14064, 18246, 21564, 25071, 28533) },
+ { AOM_CDF7(12725, 19180, 21863, 24839, 27535, 30120) },
+ { AOM_CDF7(9711, 14888, 16923, 21052, 25661, 27875) },
+ { AOM_CDF7(14940, 20797, 21678, 24186, 27033, 28999) }
};
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_EXT_REFS
-static const aom_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
-#if CONFIG_EXT_REFS
- { 36, 16, 32, 57, 11, 14 },
- { 68, 128, 73, 128, 49, 124 },
- { 136, 236, 127, 170, 81, 238 },
- { 128, 128, 191, 211, 115, 128 },
- { 224, 128, 230, 242, 208, 128 }
-#else // !CONFIG_EXT_REFS
- { 31, 25 }, { 72, 80 }, { 147, 148 }, { 197, 191 }, { 235, 247 },
-#endif // CONFIG_EXT_REFS
-};
-
-#if CONFIG_NEW_MULTISYMBOL
static const aom_cdf_prob
- default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)] = {
-#if CONFIG_EXT_REFS
- { { AOM_ICDF(4623), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2110), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4132), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7309), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1392), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1781), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(8659), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16372), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9371), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16322), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6216), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15834), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(17353), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30182), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16300), AOM_ICDF(32768), 0 },
- { AOM_ICDF(21702), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10365), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30486), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(24426), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26972), AOM_ICDF(32768), 0 },
- { AOM_ICDF(14760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(28634), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
- { AOM_ICDF(29425), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30969), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26676), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32768), AOM_ICDF(32768), 0 } }
-#else // !CONFIG_EXT_REFS
- { { AOM_ICDF(31 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(25 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(72 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(80 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(147 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(148 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(197 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(191 * 128), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(235 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 } }
-#endif // CONFIG_EXT_REFS
+ default_palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
+ { AOM_CDF7(8713, 19979, 27128, 29609, 31331, 32272) },
+ { AOM_CDF7(5839, 15573, 23581, 26947, 29848, 31700) },
+ { AOM_CDF7(4426, 11260, 17999, 21483, 25863, 29430) },
+ { AOM_CDF7(3228, 9464, 14993, 18089, 22523, 27420) },
+ { AOM_CDF7(3768, 8886, 13091, 17852, 22495, 27207) },
+ { AOM_CDF7(2464, 8451, 12861, 21632, 25525, 28555) },
+ { AOM_CDF7(1269, 5435, 10433, 18963, 21700, 25865) }
+ };
+
+static const aom_cdf_prob default_palette_y_mode_cdf
+ [PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][CDF_SIZE(2)] = {
+ { { AOM_CDF2(31676) }, { AOM_CDF2(3419) }, { AOM_CDF2(1261) } },
+ { { AOM_CDF2(31912) }, { AOM_CDF2(2859) }, { AOM_CDF2(980) } },
+ { { AOM_CDF2(31823) }, { AOM_CDF2(3400) }, { AOM_CDF2(781) } },
+ { { AOM_CDF2(32030) }, { AOM_CDF2(3561) }, { AOM_CDF2(904) } },
+ { { AOM_CDF2(32309) }, { AOM_CDF2(7337) }, { AOM_CDF2(1462) } },
+ { { AOM_CDF2(32265) }, { AOM_CDF2(4015) }, { AOM_CDF2(1521) } },
+ { { AOM_CDF2(32450) }, { AOM_CDF2(7946) }, { AOM_CDF2(129) } }
};
-#endif // CONFIG_NEW_MULTISYMBOL
-
-#if CONFIG_COMPOUND_SINGLEREF
-// TODO(zoeliu): Default values to be further adjusted based on the collected
-// stats.
-static const aom_prob default_comp_inter_mode_p[COMP_INTER_MODE_CONTEXTS] = {
- 40, 110, 160, 220
-};
-#endif // CONFIG_COMPOUND_SINGLEREF
-
-// TODO(huisu): tune these cdfs
-const aom_cdf_prob
- default_palette_y_size_cdf[PALETTE_BLOCK_SIZES][CDF_SIZE(PALETTE_SIZES)] = {
- { AOM_ICDF(12288), AOM_ICDF(19408), AOM_ICDF(24627), AOM_ICDF(26662),
- AOM_ICDF(28499), AOM_ICDF(30667), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2815), AOM_ICDF(4570), AOM_ICDF(9416), AOM_ICDF(10875),
- AOM_ICDF(13782), AOM_ICDF(19863), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3839), AOM_ICDF(5986), AOM_ICDF(11949), AOM_ICDF(13413),
- AOM_ICDF(16286), AOM_ICDF(21823), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12032), AOM_ICDF(14948), AOM_ICDF(22187), AOM_ICDF(23138),
- AOM_ICDF(24756), AOM_ICDF(27635), AOM_ICDF(32768), 0 },
- { AOM_ICDF(14847), AOM_ICDF(20167), AOM_ICDF(25433), AOM_ICDF(26751),
- AOM_ICDF(28278), AOM_ICDF(30119), AOM_ICDF(32768), 0 },
- { AOM_ICDF(14336), AOM_ICDF(20240), AOM_ICDF(24840), AOM_ICDF(26079),
- AOM_ICDF(27908), AOM_ICDF(30034), AOM_ICDF(32768), 0 },
- { AOM_ICDF(18816), AOM_ICDF(25574), AOM_ICDF(29030), AOM_ICDF(29877),
- AOM_ICDF(30656), AOM_ICDF(31506), AOM_ICDF(32768), 0 },
- { AOM_ICDF(23039), AOM_ICDF(27333), AOM_ICDF(30220), AOM_ICDF(30708),
- AOM_ICDF(31070), AOM_ICDF(31826), AOM_ICDF(32768), 0 },
- { AOM_ICDF(13696), AOM_ICDF(18911), AOM_ICDF(23620), AOM_ICDF(25371),
- AOM_ICDF(29821), AOM_ICDF(31617), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12543), AOM_ICDF(20838), AOM_ICDF(27455), AOM_ICDF(28762),
- AOM_ICDF(29763), AOM_ICDF(31546), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(12543), AOM_ICDF(20838), AOM_ICDF(27455), AOM_ICDF(28762),
- AOM_ICDF(29763), AOM_ICDF(31546), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12543), AOM_ICDF(20838), AOM_ICDF(27455), AOM_ICDF(28762),
- AOM_ICDF(29763), AOM_ICDF(31546), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12543), AOM_ICDF(20838), AOM_ICDF(27455), AOM_ICDF(28762),
- AOM_ICDF(29763), AOM_ICDF(31546), AOM_ICDF(32768), 0 },
-#endif
- };
-
-const aom_cdf_prob default_palette_uv_size_cdf[PALETTE_BLOCK_SIZES][CDF_SIZE(
- PALETTE_SIZES)] = {
- { AOM_ICDF(20480), AOM_ICDF(29888), AOM_ICDF(32453), AOM_ICDF(32715),
- AOM_ICDF(32751), AOM_ICDF(32766), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11135), AOM_ICDF(23641), AOM_ICDF(31056), AOM_ICDF(31998),
- AOM_ICDF(32496), AOM_ICDF(32668), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(23108), AOM_ICDF(30806), AOM_ICDF(31871),
- AOM_ICDF(32414), AOM_ICDF(32637), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9984), AOM_ICDF(21999), AOM_ICDF(29192), AOM_ICDF(30645),
- AOM_ICDF(31640), AOM_ICDF(32402), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7552), AOM_ICDF(16614), AOM_ICDF(24880), AOM_ICDF(27283),
- AOM_ICDF(29254), AOM_ICDF(31203), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9600), AOM_ICDF(20279), AOM_ICDF(27548), AOM_ICDF(29261),
- AOM_ICDF(30494), AOM_ICDF(31631), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11391), AOM_ICDF(18656), AOM_ICDF(23727), AOM_ICDF(26058),
- AOM_ICDF(27788), AOM_ICDF(30278), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8576), AOM_ICDF(13585), AOM_ICDF(17632), AOM_ICDF(20884),
- AOM_ICDF(23948), AOM_ICDF(27152), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15360), AOM_ICDF(24200), AOM_ICDF(26978), AOM_ICDF(30846),
- AOM_ICDF(31409), AOM_ICDF(32545), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(14276), AOM_ICDF(19043), AOM_ICDF(22689),
- AOM_ICDF(25799), AOM_ICDF(28712), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(9216), AOM_ICDF(14276), AOM_ICDF(19043), AOM_ICDF(22689),
- AOM_ICDF(25799), AOM_ICDF(28712), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(14276), AOM_ICDF(19043), AOM_ICDF(22689),
- AOM_ICDF(25799), AOM_ICDF(28712), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(14276), AOM_ICDF(19043), AOM_ICDF(22689),
- AOM_ICDF(25799), AOM_ICDF(28712), AOM_ICDF(32768), 0 },
-#endif
-};
-
-// When palette mode is enabled, following probability tables indicate the
-// probabilities to code the "is_palette" bit (i.e. the bit that indicates
-// if this block uses palette mode or DC_PRED mode).
-const aom_prob av1_default_palette_y_mode_prob
- [PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] = {
- { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
- { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
- { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
- { 240, 180, 100 },
-#if CONFIG_EXT_PARTITION
- { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
-#endif // CONFIG_EXT_PARTITION
- };
-
-const aom_prob av1_default_palette_uv_mode_prob[PALETTE_UV_MODE_CONTEXTS] = {
- 253, 229
-};
-
-#if CONFIG_NEW_MULTISYMBOL
-const aom_cdf_prob
- default_palette_y_mode_cdf[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
- [CDF_SIZE(2)] = {
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
-#if CONFIG_EXT_PARTITION
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
-#endif // CONFIG_EXT_PARTITION
- };
-const aom_cdf_prob
+static const aom_cdf_prob
default_palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_ICDF(128 * 253), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 229), AOM_ICDF(32768), 0 }
+ { AOM_CDF2(32461) }, { AOM_CDF2(21488) }
};
-#endif
-
-const aom_cdf_prob default_palette_y_color_index_cdf
+static const aom_cdf_prob default_palette_y_color_index_cdf
[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
{
- { AOM_ICDF(29568), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(28672), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(31872), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+ { AOM_CDF2(28710) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(10553) },
+ { AOM_CDF2(27036) },
+ { AOM_CDF2(31603) },
},
{
- { AOM_ICDF(28032), AOM_ICDF(30326), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(11647), AOM_ICDF(27405), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(4352), AOM_ICDF(30659), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(23552), AOM_ICDF(27800), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(32256), AOM_ICDF(32504), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
+ { AOM_CDF3(27877, 30490) },
+ { AOM_CDF3(11532, 25697) },
+ { AOM_CDF3(6544, 30234) },
+ { AOM_CDF3(23018, 28072) },
+ { AOM_CDF3(31915, 32385) },
},
{
- { AOM_ICDF(26112), AOM_ICDF(28374), AOM_ICDF(30039), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(9472), AOM_ICDF(22576), AOM_ICDF(27712), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(26138), AOM_ICDF(29608), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(19328), AOM_ICDF(23791), AOM_ICDF(28946), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31984), AOM_ICDF(32336), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
+ { AOM_CDF4(25572, 28046, 30045) },
+ { AOM_CDF4(9478, 21590, 27256) },
+ { AOM_CDF4(7248, 26837, 29824) },
+ { AOM_CDF4(19167, 24486, 28349) },
+ { AOM_CDF4(31400, 31825, 32250) },
},
{
- { AOM_ICDF(27904), AOM_ICDF(29215), AOM_ICDF(30075), AOM_ICDF(31190),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(9728), AOM_ICDF(22598), AOM_ICDF(26134), AOM_ICDF(29425),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(2688), AOM_ICDF(30066), AOM_ICDF(31058), AOM_ICDF(31933),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(22015), AOM_ICDF(25039), AOM_ICDF(27726), AOM_ICDF(29932),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(32383), AOM_ICDF(32482), AOM_ICDF(32554), AOM_ICDF(32660),
- AOM_ICDF(32768), 0, 0, 0, 0 },
+ { AOM_CDF5(24779, 26955, 28576, 30282) },
+ { AOM_CDF5(8669, 20364, 24073, 28093) },
+ { AOM_CDF5(4255, 27565, 29377, 31067) },
+ { AOM_CDF5(19864, 23674, 26716, 29530) },
+ { AOM_CDF5(31646, 31893, 32147, 32426) },
},
{
- { AOM_ICDF(24319), AOM_ICDF(26299), AOM_ICDF(27486), AOM_ICDF(28600),
- AOM_ICDF(29804), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(7935), AOM_ICDF(18217), AOM_ICDF(21116), AOM_ICDF(25440),
- AOM_ICDF(28589), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(25016), AOM_ICDF(27105), AOM_ICDF(28698),
- AOM_ICDF(30399), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(19967), AOM_ICDF(24117), AOM_ICDF(26550), AOM_ICDF(28566),
- AOM_ICDF(30224), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(31359), AOM_ICDF(31607), AOM_ICDF(31775), AOM_ICDF(31977),
- AOM_ICDF(32258), AOM_ICDF(32768), 0, 0, 0 },
+ { AOM_CDF6(23132, 25407, 26970, 28435, 30073) },
+ { AOM_CDF6(7443, 17242, 20717, 24762, 27982) },
+ { AOM_CDF6(6300, 24862, 26944, 28784, 30671) },
+ { AOM_CDF6(18916, 22895, 25267, 27435, 29652) },
+ { AOM_CDF6(31270, 31550, 31808, 32059, 32353) },
},
{
- { AOM_ICDF(26368), AOM_ICDF(27768), AOM_ICDF(28588), AOM_ICDF(29274),
- AOM_ICDF(29997), AOM_ICDF(30917), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(8960), AOM_ICDF(18260), AOM_ICDF(20810), AOM_ICDF(23986),
- AOM_ICDF(26627), AOM_ICDF(28882), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(7295), AOM_ICDF(24111), AOM_ICDF(25836), AOM_ICDF(27515),
- AOM_ICDF(29033), AOM_ICDF(30769), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(22016), AOM_ICDF(25208), AOM_ICDF(27305), AOM_ICDF(28159),
- AOM_ICDF(29221), AOM_ICDF(30274), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31932), AOM_ICDF(32050), AOM_ICDF(32199),
- AOM_ICDF(32335), AOM_ICDF(32521), AOM_ICDF(32768), 0, 0 },
+ { AOM_CDF7(23105, 25199, 26464, 27684, 28931, 30318) },
+ { AOM_CDF7(6950, 15447, 18952, 22681, 25567, 28563) },
+ { AOM_CDF7(7560, 23474, 25490, 27203, 28921, 30708) },
+ { AOM_CDF7(18544, 22373, 24457, 26195, 28119, 30045) },
+ { AOM_CDF7(31198, 31451, 31670, 31882, 32123, 32391) },
},
{
- { AOM_ICDF(26624), AOM_ICDF(27872), AOM_ICDF(28599), AOM_ICDF(29153),
- AOM_ICDF(29633), AOM_ICDF(30172), AOM_ICDF(30841), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(6655), AOM_ICDF(17569), AOM_ICDF(19587), AOM_ICDF(23345),
- AOM_ICDF(25884), AOM_ICDF(28088), AOM_ICDF(29678), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(3584), AOM_ICDF(27296), AOM_ICDF(28429), AOM_ICDF(29158),
- AOM_ICDF(30032), AOM_ICDF(30780), AOM_ICDF(31572), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(23551), AOM_ICDF(25855), AOM_ICDF(27070), AOM_ICDF(27893),
- AOM_ICDF(28597), AOM_ICDF(29721), AOM_ICDF(30970), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(32128), AOM_ICDF(32173), AOM_ICDF(32245), AOM_ICDF(32337),
- AOM_ICDF(32416), AOM_ICDF(32500), AOM_ICDF(32609), AOM_ICDF(32768),
- 0 },
+ { AOM_CDF8(21689, 23883, 25163, 26352, 27506, 28827, 30195) },
+ { AOM_CDF8(6892, 15385, 17840, 21606, 24287, 26753, 29204) },
+ { AOM_CDF8(5651, 23182, 25042, 26518, 27982, 29392, 30900) },
+ { AOM_CDF8(19349, 22578, 24418, 25994, 27524, 29031, 30448) },
+ { AOM_CDF8(31028, 31270, 31504, 31705, 31927, 32153, 32392) },
},
};
-const aom_cdf_prob default_palette_uv_color_index_cdf
+static const aom_cdf_prob default_palette_uv_color_index_cdf
[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
{
- { AOM_ICDF(29824), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(30720), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+ { AOM_CDF2(29089) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(8713) },
+ { AOM_CDF2(29257) },
+ { AOM_CDF2(31610) },
},
{
- { AOM_ICDF(27648), AOM_ICDF(30208), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(14080), AOM_ICDF(26563), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(5120), AOM_ICDF(30932), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(24448), AOM_ICDF(27828), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(31616), AOM_ICDF(32219), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
+ { AOM_CDF3(25257, 29145) },
+ { AOM_CDF3(12287, 27293) },
+ { AOM_CDF3(7033, 27960) },
+ { AOM_CDF3(20145, 25405) },
+ { AOM_CDF3(30608, 31639) },
},
{
- { AOM_ICDF(25856), AOM_ICDF(28259), AOM_ICDF(30584), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(11520), AOM_ICDF(22476), AOM_ICDF(27944), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(8064), AOM_ICDF(26882), AOM_ICDF(30308), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(19455), AOM_ICDF(23823), AOM_ICDF(29134), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(30848), AOM_ICDF(31501), AOM_ICDF(32174), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
+ { AOM_CDF4(24210, 27175, 29903) },
+ { AOM_CDF4(9888, 22386, 27214) },
+ { AOM_CDF4(5901, 26053, 29293) },
+ { AOM_CDF4(18318, 22152, 28333) },
+ { AOM_CDF4(30459, 31136, 31926) },
},
{
- { AOM_ICDF(26751), AOM_ICDF(28020), AOM_ICDF(29541), AOM_ICDF(31230),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(12032), AOM_ICDF(26045), AOM_ICDF(30772), AOM_ICDF(31497),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(1280), AOM_ICDF(32153), AOM_ICDF(32458), AOM_ICDF(32560),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(23424), AOM_ICDF(24154), AOM_ICDF(29201), AOM_ICDF(29856),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(32256), AOM_ICDF(32402), AOM_ICDF(32561), AOM_ICDF(32682),
- AOM_ICDF(32768), 0, 0, 0, 0 },
+ { AOM_CDF5(22980, 25479, 27781, 29986) },
+ { AOM_CDF5(8413, 21408, 24859, 28874) },
+ { AOM_CDF5(2257, 29449, 30594, 31598) },
+ { AOM_CDF5(19189, 21202, 25915, 28620) },
+ { AOM_CDF5(31844, 32044, 32281, 32518) },
},
{
- { AOM_ICDF(24576), AOM_ICDF(26720), AOM_ICDF(28114), AOM_ICDF(28950),
- AOM_ICDF(31694), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(7551), AOM_ICDF(16613), AOM_ICDF(20462), AOM_ICDF(25269),
- AOM_ICDF(29077), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(6272), AOM_ICDF(23039), AOM_ICDF(25623), AOM_ICDF(28163),
- AOM_ICDF(30861), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(17024), AOM_ICDF(18808), AOM_ICDF(20771), AOM_ICDF(27941),
- AOM_ICDF(29845), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(31616), AOM_ICDF(31936), AOM_ICDF(32079), AOM_ICDF(32321),
- AOM_ICDF(32546), AOM_ICDF(32768), 0, 0, 0 },
+ { AOM_CDF6(22217, 24567, 26637, 28683, 30548) },
+ { AOM_CDF6(7307, 16406, 19636, 24632, 28424) },
+ { AOM_CDF6(4441, 25064, 26879, 28942, 30919) },
+ { AOM_CDF6(17210, 20528, 23319, 26750, 29582) },
+ { AOM_CDF6(30674, 30953, 31396, 31735, 32207) },
},
{
- { AOM_ICDF(23296), AOM_ICDF(25590), AOM_ICDF(27833), AOM_ICDF(29337),
- AOM_ICDF(29954), AOM_ICDF(31229), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(7552), AOM_ICDF(13659), AOM_ICDF(16570), AOM_ICDF(21695),
- AOM_ICDF(24506), AOM_ICDF(27701), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(6911), AOM_ICDF(24788), AOM_ICDF(26284), AOM_ICDF(27753),
- AOM_ICDF(29575), AOM_ICDF(30872), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(17535), AOM_ICDF(22236), AOM_ICDF(24457), AOM_ICDF(26242),
- AOM_ICDF(27363), AOM_ICDF(30191), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(30592), AOM_ICDF(31289), AOM_ICDF(31745), AOM_ICDF(31921),
- AOM_ICDF(32149), AOM_ICDF(32321), AOM_ICDF(32768), 0, 0 },
+ { AOM_CDF7(21239, 23168, 25044, 26962, 28705, 30506) },
+ { AOM_CDF7(6545, 15012, 18004, 21817, 25503, 28701) },
+ { AOM_CDF7(3448, 26295, 27437, 28704, 30126, 31442) },
+ { AOM_CDF7(15889, 18323, 21704, 24698, 26976, 29690) },
+ { AOM_CDF7(30988, 31204, 31479, 31734, 31983, 32325) },
},
{
- { AOM_ICDF(22016), AOM_ICDF(24242), AOM_ICDF(25141), AOM_ICDF(27137),
- AOM_ICDF(27797), AOM_ICDF(29331), AOM_ICDF(30848), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(8063), AOM_ICDF(13564), AOM_ICDF(16940), AOM_ICDF(21948),
- AOM_ICDF(24568), AOM_ICDF(25689), AOM_ICDF(26989), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(6528), AOM_ICDF(27028), AOM_ICDF(27835), AOM_ICDF(28741),
- AOM_ICDF(30031), AOM_ICDF(31795), AOM_ICDF(32285), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(18047), AOM_ICDF(23797), AOM_ICDF(25444), AOM_ICDF(26274),
- AOM_ICDF(27111), AOM_ICDF(27929), AOM_ICDF(30367), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(30208), AOM_ICDF(30628), AOM_ICDF(31046), AOM_ICDF(31658),
- AOM_ICDF(31762), AOM_ICDF(32367), AOM_ICDF(32469), AOM_ICDF(32768),
- 0 },
- }
- };
-#if CONFIG_MRC_TX
-// TODO(sarahparker) Tune these cdfs
-const aom_cdf_prob default_mrc_mask_intra_cdf
- [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
- {
- { AOM_ICDF(29568), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(28672), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(31872), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- },
- {
- { AOM_ICDF(28032), AOM_ICDF(30326), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(11647), AOM_ICDF(27405), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(4352), AOM_ICDF(30659), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(23552), AOM_ICDF(27800), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(32256), AOM_ICDF(32504), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- },
- {
- { AOM_ICDF(26112), AOM_ICDF(28374), AOM_ICDF(30039), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(9472), AOM_ICDF(22576), AOM_ICDF(27712), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(26138), AOM_ICDF(29608), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(19328), AOM_ICDF(23791), AOM_ICDF(28946), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31984), AOM_ICDF(32336), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- },
- {
- { AOM_ICDF(27904), AOM_ICDF(29215), AOM_ICDF(30075), AOM_ICDF(31190),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(9728), AOM_ICDF(22598), AOM_ICDF(26134), AOM_ICDF(29425),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(2688), AOM_ICDF(30066), AOM_ICDF(31058), AOM_ICDF(31933),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(22015), AOM_ICDF(25039), AOM_ICDF(27726), AOM_ICDF(29932),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(32383), AOM_ICDF(32482), AOM_ICDF(32554), AOM_ICDF(32660),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- },
- {
- { AOM_ICDF(24319), AOM_ICDF(26299), AOM_ICDF(27486), AOM_ICDF(28600),
- AOM_ICDF(29804), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(7935), AOM_ICDF(18217), AOM_ICDF(21116), AOM_ICDF(25440),
- AOM_ICDF(28589), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(25016), AOM_ICDF(27105), AOM_ICDF(28698),
- AOM_ICDF(30399), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(19967), AOM_ICDF(24117), AOM_ICDF(26550), AOM_ICDF(28566),
- AOM_ICDF(30224), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(31359), AOM_ICDF(31607), AOM_ICDF(31775), AOM_ICDF(31977),
- AOM_ICDF(32258), AOM_ICDF(32768), 0, 0, 0 },
- },
- {
- { AOM_ICDF(26368), AOM_ICDF(27768), AOM_ICDF(28588), AOM_ICDF(29274),
- AOM_ICDF(29997), AOM_ICDF(30917), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(8960), AOM_ICDF(18260), AOM_ICDF(20810), AOM_ICDF(23986),
- AOM_ICDF(26627), AOM_ICDF(28882), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(7295), AOM_ICDF(24111), AOM_ICDF(25836), AOM_ICDF(27515),
- AOM_ICDF(29033), AOM_ICDF(30769), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(22016), AOM_ICDF(25208), AOM_ICDF(27305), AOM_ICDF(28159),
- AOM_ICDF(29221), AOM_ICDF(30274), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31932), AOM_ICDF(32050), AOM_ICDF(32199),
- AOM_ICDF(32335), AOM_ICDF(32521), AOM_ICDF(32768), 0, 0 },
- },
- {
- { AOM_ICDF(26624), AOM_ICDF(27872), AOM_ICDF(28599), AOM_ICDF(29153),
- AOM_ICDF(29633), AOM_ICDF(30172), AOM_ICDF(30841), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(6655), AOM_ICDF(17569), AOM_ICDF(19587), AOM_ICDF(23345),
- AOM_ICDF(25884), AOM_ICDF(28088), AOM_ICDF(29678), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(3584), AOM_ICDF(27296), AOM_ICDF(28429), AOM_ICDF(29158),
- AOM_ICDF(30032), AOM_ICDF(30780), AOM_ICDF(31572), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(23551), AOM_ICDF(25855), AOM_ICDF(27070), AOM_ICDF(27893),
- AOM_ICDF(28597), AOM_ICDF(29721), AOM_ICDF(30970), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(32128), AOM_ICDF(32173), AOM_ICDF(32245), AOM_ICDF(32337),
- AOM_ICDF(32416), AOM_ICDF(32500), AOM_ICDF(32609), AOM_ICDF(32768),
- 0 },
+ { AOM_CDF8(21442, 23288, 24758, 26246, 27649, 28980, 30563) },
+ { AOM_CDF8(5863, 14933, 17552, 20668, 23683, 26411, 29273) },
+ { AOM_CDF8(3415, 25810, 26877, 27990, 29223, 30394, 31618) },
+ { AOM_CDF8(17965, 20084, 22232, 23974, 26274, 28402, 30390) },
+ { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) },
},
};
-const aom_cdf_prob default_mrc_mask_inter_cdf
- [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
- {
- { AOM_ICDF(29568), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(28672), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(31872), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
- },
- {
- { AOM_ICDF(28032), AOM_ICDF(30326), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(11647), AOM_ICDF(27405), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(4352), AOM_ICDF(30659), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(23552), AOM_ICDF(27800), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- { AOM_ICDF(32256), AOM_ICDF(32504), AOM_ICDF(32768), 0, 0, 0, 0, 0,
- 0 },
- },
- {
- { AOM_ICDF(26112), AOM_ICDF(28374), AOM_ICDF(30039), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(9472), AOM_ICDF(22576), AOM_ICDF(27712), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(26138), AOM_ICDF(29608), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(19328), AOM_ICDF(23791), AOM_ICDF(28946), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31984), AOM_ICDF(32336), AOM_ICDF(32768),
- 0, 0, 0, 0, 0 },
- },
- {
- { AOM_ICDF(27904), AOM_ICDF(29215), AOM_ICDF(30075), AOM_ICDF(31190),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(9728), AOM_ICDF(22598), AOM_ICDF(26134), AOM_ICDF(29425),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(2688), AOM_ICDF(30066), AOM_ICDF(31058), AOM_ICDF(31933),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(22015), AOM_ICDF(25039), AOM_ICDF(27726), AOM_ICDF(29932),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- { AOM_ICDF(32383), AOM_ICDF(32482), AOM_ICDF(32554), AOM_ICDF(32660),
- AOM_ICDF(32768), 0, 0, 0, 0 },
- },
+static const aom_cdf_prob
+ default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)] = {
+ { AOM_CDF2(28581) }, { AOM_CDF2(23846) }, { AOM_CDF2(20847) },
+ { AOM_CDF2(24315) }, { AOM_CDF2(18196) }, { AOM_CDF2(12133) },
+ { AOM_CDF2(18791) }, { AOM_CDF2(10887) }, { AOM_CDF2(11005) },
+ { AOM_CDF2(27179) }, { AOM_CDF2(20004) }, { AOM_CDF2(11281) },
+ { AOM_CDF2(26549) }, { AOM_CDF2(19308) }, { AOM_CDF2(14224) },
+ { AOM_CDF2(28015) }, { AOM_CDF2(21546) }, { AOM_CDF2(14400) },
+ { AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) }
+ };
+
+static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
+ { AOM_CDF2(31671) }, { AOM_CDF2(16515) }, { AOM_CDF2(4576) }
+};
+
+static const aom_cdf_prob default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(
+ 2)] = { { AOM_CDF2(32621) }, { AOM_CDF2(20708) }, { AOM_CDF2(8127) } };
+
+static const aom_cdf_prob
+ default_compound_idx_cdfs[COMP_INDEX_CONTEXTS][CDF_SIZE(2)] = {
+ { AOM_CDF2(18244) }, { AOM_CDF2(12865) }, { AOM_CDF2(7053) },
+ { AOM_CDF2(13259) }, { AOM_CDF2(9334) }, { AOM_CDF2(4644) }
+ };
+
+static const aom_cdf_prob
+ default_comp_group_idx_cdfs[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)] = {
+ { AOM_CDF2(26607) }, { AOM_CDF2(22891) }, { AOM_CDF2(18840) },
+ { AOM_CDF2(24594) }, { AOM_CDF2(19934) }, { AOM_CDF2(22674) }
+ };
+
+static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = { AOM_CDF2(
+ 30531) };
+
+static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE(
+ FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) };
+
+static const aom_cdf_prob default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(
+ 2)] = { { AOM_CDF2(4621) }, { AOM_CDF2(6743) }, { AOM_CDF2(5893) },
+ { AOM_CDF2(7866) }, { AOM_CDF2(12551) }, { AOM_CDF2(9394) },
+ { AOM_CDF2(12408) }, { AOM_CDF2(14301) }, { AOM_CDF2(12756) },
+ { AOM_CDF2(22343) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) }, { AOM_CDF2(12770) }, { AOM_CDF2(10368) },
+ { AOM_CDF2(20229) }, { AOM_CDF2(18101) }, { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } };
+
+static const aom_cdf_prob default_switchable_restore_cdf[CDF_SIZE(
+ RESTORE_SWITCHABLE_TYPES)] = { AOM_CDF3(9413, 22581) };
+
+static const aom_cdf_prob default_wiener_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2(
+ 11570) };
+
+static const aom_cdf_prob default_sgrproj_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2(
+ 16855) };
+
+static const aom_cdf_prob default_delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)] = {
+ AOM_CDF4(28160, 32120, 32677)
+};
+
+static const aom_cdf_prob default_delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(
+ DELTA_LF_PROBS + 1)] = { { AOM_CDF4(28160, 32120, 32677) },
+ { AOM_CDF4(28160, 32120, 32677) },
+ { AOM_CDF4(28160, 32120, 32677) },
+ { AOM_CDF4(28160, 32120, 32677) } };
+static const aom_cdf_prob default_delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)] = {
+ AOM_CDF4(28160, 32120, 32677)
+};
+
+// FIXME(someone) need real defaults here
+static const aom_cdf_prob default_seg_tree_cdf[CDF_SIZE(MAX_SEGMENTS)] = {
+ AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672)
+};
+
+static const aom_cdf_prob
+ default_segment_pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)] = {
+ { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) }
+ };
+
+static const aom_cdf_prob
+ default_spatial_pred_seg_tree_cdf[SPATIAL_PREDICTION_PROBS][CDF_SIZE(
+ MAX_SEGMENTS)] = {
{
- { AOM_ICDF(24319), AOM_ICDF(26299), AOM_ICDF(27486), AOM_ICDF(28600),
- AOM_ICDF(29804), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(7935), AOM_ICDF(18217), AOM_ICDF(21116), AOM_ICDF(25440),
- AOM_ICDF(28589), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(6656), AOM_ICDF(25016), AOM_ICDF(27105), AOM_ICDF(28698),
- AOM_ICDF(30399), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(19967), AOM_ICDF(24117), AOM_ICDF(26550), AOM_ICDF(28566),
- AOM_ICDF(30224), AOM_ICDF(32768), 0, 0, 0 },
- { AOM_ICDF(31359), AOM_ICDF(31607), AOM_ICDF(31775), AOM_ICDF(31977),
- AOM_ICDF(32258), AOM_ICDF(32768), 0, 0, 0 },
+ AOM_CDF8(5622, 7893, 16093, 18233, 27809, 28373, 32533),
},
{
- { AOM_ICDF(26368), AOM_ICDF(27768), AOM_ICDF(28588), AOM_ICDF(29274),
- AOM_ICDF(29997), AOM_ICDF(30917), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(8960), AOM_ICDF(18260), AOM_ICDF(20810), AOM_ICDF(23986),
- AOM_ICDF(26627), AOM_ICDF(28882), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(7295), AOM_ICDF(24111), AOM_ICDF(25836), AOM_ICDF(27515),
- AOM_ICDF(29033), AOM_ICDF(30769), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(22016), AOM_ICDF(25208), AOM_ICDF(27305), AOM_ICDF(28159),
- AOM_ICDF(29221), AOM_ICDF(30274), AOM_ICDF(32768), 0, 0 },
- { AOM_ICDF(31744), AOM_ICDF(31932), AOM_ICDF(32050), AOM_ICDF(32199),
- AOM_ICDF(32335), AOM_ICDF(32521), AOM_ICDF(32768), 0, 0 },
+ AOM_CDF8(14274, 18230, 22557, 24935, 29980, 30851, 32344),
},
{
- { AOM_ICDF(26624), AOM_ICDF(27872), AOM_ICDF(28599), AOM_ICDF(29153),
- AOM_ICDF(29633), AOM_ICDF(30172), AOM_ICDF(30841), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(6655), AOM_ICDF(17569), AOM_ICDF(19587), AOM_ICDF(23345),
- AOM_ICDF(25884), AOM_ICDF(28088), AOM_ICDF(29678), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(3584), AOM_ICDF(27296), AOM_ICDF(28429), AOM_ICDF(29158),
- AOM_ICDF(30032), AOM_ICDF(30780), AOM_ICDF(31572), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(23551), AOM_ICDF(25855), AOM_ICDF(27070), AOM_ICDF(27893),
- AOM_ICDF(28597), AOM_ICDF(29721), AOM_ICDF(30970), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(32128), AOM_ICDF(32173), AOM_ICDF(32245), AOM_ICDF(32337),
- AOM_ICDF(32416), AOM_ICDF(32500), AOM_ICDF(32609), AOM_ICDF(32768),
- 0 },
+ AOM_CDF8(27527, 28487, 28723, 28890, 32397, 32647, 32679),
},
};
-#endif // CONFIG_MRC_TX
-#if CONFIG_INTRABC
-static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = {
- AOM_ICDF(192 * 128), AOM_ICDF(32768), 0,
-};
-#endif // CONFIG_INTRABC
+static const aom_cdf_prob default_tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS]
+ [CDF_SIZE(MAX_TX_DEPTH + 1)] = {
+ { { AOM_CDF2(19968) },
+ { AOM_CDF2(19968) },
+ { AOM_CDF2(24320) } },
+ { { AOM_CDF3(12272, 30172) },
+ { AOM_CDF3(12272, 30172) },
+ { AOM_CDF3(18677, 30848) } },
+ { { AOM_CDF3(12986, 15180) },
+ { AOM_CDF3(12986, 15180) },
+ { AOM_CDF3(24302, 25602) } },
+ { { AOM_CDF3(5782, 11475) },
+ { AOM_CDF3(5782, 11475) },
+ { AOM_CDF3(16803, 22759) } },
+ };
#define MAX_COLOR_CONTEXT_HASH 8
// Negative values are invalid
@@ -2482,68 +896,43 @@ static const int palette_color_index_context_lookup[MAX_COLOR_CONTEXT_HASH +
1] = { -1, -1, 0, -1, -1,
4, 3, 2, 1 };
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-static const aom_prob default_quarter_tx_size_prob = 192;
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob default_quarter_tx_size_cdf[CDF_SIZE(2)] = {
- AOM_ICDF(192 * 128), AOM_ICDF(32768), 0
-};
-#endif
-#endif
-
-#if CONFIG_LOOP_RESTORATION
-const aom_tree_index
- av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)] = {
- -RESTORE_NONE, 2, -RESTORE_WIENER, -RESTORE_SGRPROJ,
- };
-
-static const aom_prob
- default_switchable_restore_prob[RESTORE_SWITCHABLE_TYPES - 1] = {
- 32, 128,
- };
-#endif // CONFIG_LOOP_RESTORATION
-
#define NUM_PALETTE_NEIGHBORS 3 // left, top-left and top.
int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
int r, int c, int palette_size,
uint8_t *color_order, int *color_idx) {
- int i;
- // The +10 below should not be needed. But we get a warning "array subscript
- // is above array bounds [-Werror=array-bounds]" without it, possibly due to
- // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
- int scores[PALETTE_MAX_SIZE + 10];
- const int weights[NUM_PALETTE_NEIGHBORS] = { 2, 1, 2 };
- const int hash_multipliers[NUM_PALETTE_NEIGHBORS] = { 1, 2, 2 };
- int color_index_ctx_hash;
- int color_index_ctx;
- int color_neighbors[NUM_PALETTE_NEIGHBORS];
- int inverse_color_order[PALETTE_MAX_SIZE];
assert(palette_size <= PALETTE_MAX_SIZE);
assert(r > 0 || c > 0);
// Get color indices of neighbors.
+ int color_neighbors[NUM_PALETTE_NEIGHBORS];
color_neighbors[0] = (c - 1 >= 0) ? color_map[r * stride + c - 1] : -1;
color_neighbors[1] =
(c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * stride + c - 1] : -1;
color_neighbors[2] = (r - 1 >= 0) ? color_map[(r - 1) * stride + c] : -1;
- for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
- color_order[i] = i;
- inverse_color_order[i] = i;
- }
- memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
+ // The +10 below should not be needed. But we get a warning "array subscript
+ // is above array bounds [-Werror=array-bounds]" without it, possibly due to
+ // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
+ int scores[PALETTE_MAX_SIZE + 10] = { 0 };
+ int i;
+ static const int weights[NUM_PALETTE_NEIGHBORS] = { 2, 1, 2 };
for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
if (color_neighbors[i] >= 0) {
scores[color_neighbors[i]] += weights[i];
}
}
+ int inverse_color_order[PALETTE_MAX_SIZE];
+ for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
+ color_order[i] = i;
+ inverse_color_order[i] = i;
+ }
+
// Get the top NUM_PALETTE_NEIGHBORS scores (sorted from large to small).
for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
int max = scores[i];
int max_idx = i;
- int j;
- for (j = i + 1; j < palette_size; ++j) {
+ for (int j = i + 1; j < palette_size; ++j) {
if (scores[j] > max) {
max = scores[j];
max_idx = j;
@@ -2554,8 +943,7 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
// from 'i' to 'max_idx - 1' by 1.
const int max_score = scores[max_idx];
const uint8_t max_color_order = color_order[max_idx];
- int k;
- for (k = max_idx; k > i; --k) {
+ for (int k = max_idx; k > i; --k) {
scores[k] = scores[k - 1];
color_order[k] = color_order[k - 1];
inverse_color_order[color_order[k]] = k;
@@ -2566,8 +954,12 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
}
}
+ if (color_idx != NULL)
+ *color_idx = inverse_color_order[color_map[r * stride + c]];
+
// Get hash value of context.
- color_index_ctx_hash = 0;
+ int color_index_ctx_hash = 0;
+ static const int hash_multipliers[NUM_PALETTE_NEIGHBORS] = { 1, 2, 2 };
for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
color_index_ctx_hash += scores[i] * hash_multipliers[i];
}
@@ -2575,3643 +967,137 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
assert(color_index_ctx_hash <= MAX_COLOR_CONTEXT_HASH);
// Lookup context from hash.
- color_index_ctx = palette_color_index_context_lookup[color_index_ctx_hash];
+ const int color_index_ctx =
+ palette_color_index_context_lookup[color_index_ctx_hash];
assert(color_index_ctx >= 0);
assert(color_index_ctx < PALETTE_COLOR_INDEX_CONTEXTS);
-
- if (color_idx != NULL) {
- *color_idx = inverse_color_order[color_map[r * stride + c]];
- }
return color_index_ctx;
}
#undef NUM_PALETTE_NEIGHBORS
#undef MAX_COLOR_CONTEXT_HASH
-#if CONFIG_VAR_TX
-static const aom_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
-#if CONFIG_TX64X64
- 249, 240, 223, 249, 229, 177, 250, 243, 208, 226, 187,
- 145, 236, 204, 150, 183, 149, 125, 181, 146, 113, 128
-#else
- 250, 231, 212, 241, 166, 66, 241, 230, 135, 243, 154, 64, 248, 161, 63, 128
-#endif // CONFIG_TX64X64
-};
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob
- default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)] = {
-#if CONFIG_TX64X64
- { AOM_ICDF(249 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(240 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(223 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(249 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(229 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(177 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(250 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(243 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(226 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(187 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(145 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(236 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(204 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(150 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(183 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(149 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(125 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(181 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(146 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(113 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 }
-#else
- { AOM_ICDF(250 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(231 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(212 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(241 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(166 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(66 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(241 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(230 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(135 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(243 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(154 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(64 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(248 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(161 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(63 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-#endif // CONFIG_TX64X64
- };
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_VAR_TX
-
-static const aom_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 };
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_ICDF(24576), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8192), AOM_ICDF(32768), 0 }
-};
-#endif
-
-#if CONFIG_LGT_FROM_PRED
-static const aom_prob default_intra_lgt_prob[LGT_SIZES][INTRA_MODES] = {
- { 255, 208, 208, 180, 230, 208, 194, 214, 220, 255,
-#if CONFIG_SMOOTH_HV
- 220, 220,
-#endif
- 230 },
- { 255, 192, 216, 180, 180, 180, 180, 200, 200, 255,
-#if CONFIG_SMOOTH_HV
- 220, 220,
-#endif
- 222 },
-};
-
-static const aom_prob default_inter_lgt_prob[LGT_SIZES] = { 230, 230 };
-#endif // CONFIG_LGT_FROM_PRED
-
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-static const aom_prob
- default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = {
- { 98, 63, 60 }, { 98, 82, 80 }, { 94, 65, 103 },
- { 49, 25, 24 }, { 72, 38, 50 },
- };
-const aom_tree_index av1_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)] = {
- -INTRA_FILTER_LINEAR, 2, -INTRA_FILTER_8TAP, 4, -INTRA_FILTER_8TAP_SHARP,
- -INTRA_FILTER_8TAP_SMOOTH,
-};
-int av1_intra_filter_ind[INTRA_FILTERS];
-int av1_intra_filter_inv[INTRA_FILTERS];
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-
-#if CONFIG_FILTER_INTRA
-static const aom_prob default_filter_intra_probs[2] = { 230, 230 };
-#endif // CONFIG_FILTER_INTRA
-
-#if CONFIG_SUPERTX
-static const aom_prob
- default_supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
-#if CONFIG_TX64X64
- { 1, 1, 160, 160, 170, 180 }, { 1, 1, 200, 200, 210, 220 },
-#else
- { 1, 1, 160, 160, 170 }, { 1, 1, 200, 200, 210 },
-#endif // CONFIG_TX64X64
-#else
-#if CONFIG_TX64X64
- { 1, 160, 160, 170, 180 }, { 1, 200, 200, 210, 220 },
-#else
- { 1, 160, 160, 170 }, { 1, 200, 200, 210 },
-#endif // CONFIG_TX64X64
-#endif // CONFIG_CHROMA_2X2
- };
-#endif // CONFIG_SUPERTX
-
-// FIXME(someone) need real defaults here
-static const aom_prob default_segment_tree_probs[SEG_TREE_PROBS] = {
- 128, 128, 128, 128, 128, 128, 128
-};
-// clang-format off
-static const aom_prob default_segment_pred_probs[PREDICTION_PROBS] = {
- 128, 128, 128
-};
-#if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob
- default_segment_pred_cdf[PREDICTION_PROBS][CDF_SIZE(2)] = {
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0},
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0},
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0}
-};
-#endif
-// clang-format on
-#if CONFIG_DUAL_FILTER
-#if USE_EXTRA_FILTER
-static const aom_cdf_prob
- default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
- SWITCHABLE_FILTERS)] = {
- { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
- };
-#else // USE_EXTRA_FILTER
-static const aom_cdf_prob
- default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
- SWITCHABLE_FILTERS)] = {
- { AOM_ICDF(32256), AOM_ICDF(32654), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2816), AOM_ICDF(32651), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(764), AOM_ICDF(32768), 0 },
- { AOM_ICDF(30464), AOM_ICDF(31778), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32384), AOM_ICDF(32483), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3072), AOM_ICDF(32652), AOM_ICDF(32768), 0 },
- { AOM_ICDF(256), AOM_ICDF(383), AOM_ICDF(32768), 0 },
- { AOM_ICDF(25344), AOM_ICDF(26533), AOM_ICDF(32768), 0 },
- { AOM_ICDF(32000), AOM_ICDF(32531), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(32648), AOM_ICDF(32768), 0 },
- { AOM_ICDF(384), AOM_ICDF(890), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28928), AOM_ICDF(31358), AOM_ICDF(32768), 0 },
- { AOM_ICDF(31616), AOM_ICDF(31787), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4224), AOM_ICDF(32433), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128), AOM_ICDF(256), AOM_ICDF(32768), 0 },
- { AOM_ICDF(17408), AOM_ICDF(18248), AOM_ICDF(32768), 0 }
- };
-#endif // USE_EXTRA_FILTER
-#else // CONFIG_DUAL_FILTER
-static const aom_cdf_prob
- default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
- SWITCHABLE_FILTERS)] = {
- { AOM_ICDF(30080), AOM_ICDF(31781), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(32658), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(4685), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(26776), AOM_ICDF(32768), 0 },
- };
-#endif // CONFIG_DUAL_FILTER
-
-static const aom_cdf_prob default_seg_tree_cdf[CDF_SIZE(MAX_SEGMENTS)] = {
- AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288),
- AOM_ICDF(16384), AOM_ICDF(20480), AOM_ICDF(24576),
- AOM_ICDF(28672), AOM_ICDF(32768), 0
-};
-
-static const aom_cdf_prob
- default_tx_size_cdf[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][CDF_SIZE(MAX_TX_DEPTH +
- 1)] = {
- { { AOM_ICDF(12800), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8448), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(2560), AOM_ICDF(20496), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1920), AOM_ICDF(14091), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(384), AOM_ICDF(17588), AOM_ICDF(19782), AOM_ICDF(32768), 0 },
- { AOM_ICDF(640), AOM_ICDF(7166), AOM_ICDF(8466), AOM_ICDF(32768), 0 } },
-#if CONFIG_TX64X64
- { { AOM_ICDF(128), AOM_ICDF(8288), AOM_ICDF(21293), AOM_ICDF(26986),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(128), AOM_ICDF(4208), AOM_ICDF(10009), AOM_ICDF(15965),
- AOM_ICDF(32768), 0 } },
-#endif
- };
-
-#if CONFIG_SMOOTH_HV
-static const aom_cdf_prob
- default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)] = {
- {
- AOM_ICDF(7168), AOM_ICDF(10680), AOM_ICDF(13913), AOM_ICDF(16928),
- AOM_ICDF(20294), AOM_ICDF(22790), AOM_ICDF(24706), AOM_ICDF(26275),
- AOM_ICDF(28139), AOM_ICDF(29751), AOM_ICDF(30563), AOM_ICDF(31468),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11776), AOM_ICDF(13823), AOM_ICDF(15307), AOM_ICDF(15725),
- AOM_ICDF(16638), AOM_ICDF(17406), AOM_ICDF(17994), AOM_ICDF(18814),
- AOM_ICDF(19634), AOM_ICDF(21513), AOM_ICDF(22198), AOM_ICDF(22928),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(14720), AOM_ICDF(16459), AOM_ICDF(18091), AOM_ICDF(18299),
- AOM_ICDF(18757), AOM_ICDF(19125), AOM_ICDF(19423), AOM_ICDF(19924),
- AOM_ICDF(20504), AOM_ICDF(22922), AOM_ICDF(24063), AOM_ICDF(25577),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(18944), AOM_ICDF(19925), AOM_ICDF(20908), AOM_ICDF(20998),
- AOM_ICDF(21017), AOM_ICDF(21072), AOM_ICDF(21084), AOM_ICDF(21121),
- AOM_ICDF(21159), AOM_ICDF(22064), AOM_ICDF(22820), AOM_ICDF(24290),
- AOM_ICDF(32768), 0,
- },
- };
-
-#if CONFIG_CFL
-static const aom_cdf_prob
- default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
- { AOM_ICDF(18377), AOM_ICDF(18815), AOM_ICDF(19743), AOM_ICDF(20178),
- AOM_ICDF(20560), AOM_ICDF(20889), AOM_ICDF(21359), AOM_ICDF(22098),
- AOM_ICDF(22481), AOM_ICDF(24563), AOM_ICDF(25781), AOM_ICDF(26662),
- AOM_ICDF(28396), AOM_ICDF(32768), 0 },
- { AOM_ICDF(5350), AOM_ICDF(16837), AOM_ICDF(17066), AOM_ICDF(17360),
- AOM_ICDF(17692), AOM_ICDF(18778), AOM_ICDF(18969), AOM_ICDF(19206),
- AOM_ICDF(20291), AOM_ICDF(22367), AOM_ICDF(23212), AOM_ICDF(24670),
- AOM_ICDF(27912), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6671), AOM_ICDF(6759), AOM_ICDF(17812), AOM_ICDF(17998),
- AOM_ICDF(18260), AOM_ICDF(18384), AOM_ICDF(19408), AOM_ICDF(20667),
- AOM_ICDF(20806), AOM_ICDF(22760), AOM_ICDF(24142), AOM_ICDF(24875),
- AOM_ICDF(28072), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7461), AOM_ICDF(8082), AOM_ICDF(8515), AOM_ICDF(15013),
- AOM_ICDF(15583), AOM_ICDF(16098), AOM_ICDF(16522), AOM_ICDF(18519),
- AOM_ICDF(20348), AOM_ICDF(22954), AOM_ICDF(24130), AOM_ICDF(25342),
- AOM_ICDF(26548), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3694), AOM_ICDF(4403), AOM_ICDF(5370), AOM_ICDF(5854),
- AOM_ICDF(17841), AOM_ICDF(19639), AOM_ICDF(21625), AOM_ICDF(22224),
- AOM_ICDF(22651), AOM_ICDF(24613), AOM_ICDF(25399), AOM_ICDF(26143),
- AOM_ICDF(26599), AOM_ICDF(32768), 0 },
- { AOM_ICDF(3700), AOM_ICDF(5651), AOM_ICDF(6112), AOM_ICDF(6541),
- AOM_ICDF(8929), AOM_ICDF(20623), AOM_ICDF(21213), AOM_ICDF(21640),
- AOM_ICDF(22214), AOM_ICDF(24306), AOM_ICDF(25412), AOM_ICDF(26406),
- AOM_ICDF(27249), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4649), AOM_ICDF(4947), AOM_ICDF(7128), AOM_ICDF(7432),
- AOM_ICDF(9439), AOM_ICDF(9903), AOM_ICDF(21163), AOM_ICDF(21774),
- AOM_ICDF(22056), AOM_ICDF(24426), AOM_ICDF(25403), AOM_ICDF(26324),
- AOM_ICDF(27128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7208), AOM_ICDF(7375), AOM_ICDF(8779), AOM_ICDF(9683),
- AOM_ICDF(10072), AOM_ICDF(10284), AOM_ICDF(10796), AOM_ICDF(19786),
- AOM_ICDF(20152), AOM_ICDF(22955), AOM_ICDF(24246), AOM_ICDF(25165),
- AOM_ICDF(26589), AOM_ICDF(32768), 0 },
- { AOM_ICDF(5897), AOM_ICDF(7283), AOM_ICDF(7555), AOM_ICDF(8910),
- AOM_ICDF(9391), AOM_ICDF(9937), AOM_ICDF(10276), AOM_ICDF(11044),
- AOM_ICDF(19841), AOM_ICDF(22620), AOM_ICDF(23784), AOM_ICDF(25060),
- AOM_ICDF(26418), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12171), AOM_ICDF(12718), AOM_ICDF(13885), AOM_ICDF(14348),
- AOM_ICDF(14925), AOM_ICDF(15394), AOM_ICDF(16108), AOM_ICDF(17075),
- AOM_ICDF(17583), AOM_ICDF(21996), AOM_ICDF(23614), AOM_ICDF(25048),
- AOM_ICDF(27011), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10192), AOM_ICDF(11222), AOM_ICDF(12318), AOM_ICDF(12877),
- AOM_ICDF(13533), AOM_ICDF(14184), AOM_ICDF(14866), AOM_ICDF(15879),
- AOM_ICDF(16650), AOM_ICDF(20419), AOM_ICDF(23265), AOM_ICDF(24295),
- AOM_ICDF(26596), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10776), AOM_ICDF(11387), AOM_ICDF(12899), AOM_ICDF(13471),
- AOM_ICDF(14088), AOM_ICDF(14575), AOM_ICDF(15366), AOM_ICDF(16456),
- AOM_ICDF(17040), AOM_ICDF(20815), AOM_ICDF(22009), AOM_ICDF(24448),
- AOM_ICDF(26492), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4015), AOM_ICDF(6473), AOM_ICDF(9853), AOM_ICDF(10285),
- AOM_ICDF(10655), AOM_ICDF(11032), AOM_ICDF(11431), AOM_ICDF(12199),
- AOM_ICDF(12738), AOM_ICDF(14760), AOM_ICDF(16121), AOM_ICDF(17263),
- AOM_ICDF(28612), AOM_ICDF(32768), 0 },
- };
-#else
-static const aom_cdf_prob
- default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
- {
- AOM_ICDF(23552), AOM_ICDF(25936), AOM_ICDF(28623), AOM_ICDF(29033),
- AOM_ICDF(29395), AOM_ICDF(29892), AOM_ICDF(30252), AOM_ICDF(30905),
- AOM_ICDF(31370), AOM_ICDF(31980), AOM_ICDF(32293), AOM_ICDF(32660),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(26431), AOM_ICDF(27553), AOM_ICDF(27746),
- AOM_ICDF(28022), AOM_ICDF(29080), AOM_ICDF(29204), AOM_ICDF(29377),
- AOM_ICDF(30264), AOM_ICDF(31206), AOM_ICDF(31613), AOM_ICDF(32418),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4352), AOM_ICDF(5120), AOM_ICDF(27952), AOM_ICDF(28117),
- AOM_ICDF(28473), AOM_ICDF(28759), AOM_ICDF(29563), AOM_ICDF(30864),
- AOM_ICDF(31051), AOM_ICDF(31694), AOM_ICDF(32073), AOM_ICDF(32435),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(17664), AOM_ICDF(20288), AOM_ICDF(21839), AOM_ICDF(26072),
- AOM_ICDF(26420), AOM_ICDF(26972), AOM_ICDF(27240), AOM_ICDF(28565),
- AOM_ICDF(30914), AOM_ICDF(31694), AOM_ICDF(32083), AOM_ICDF(32591),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(16640), AOM_ICDF(18390), AOM_ICDF(20233), AOM_ICDF(20557),
- AOM_ICDF(25162), AOM_ICDF(27789), AOM_ICDF(29397), AOM_ICDF(29895),
- AOM_ICDF(30369), AOM_ICDF(31497), AOM_ICDF(32025), AOM_ICDF(32642),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(13952), AOM_ICDF(17947), AOM_ICDF(18918), AOM_ICDF(19206),
- AOM_ICDF(21131), AOM_ICDF(30668), AOM_ICDF(31061), AOM_ICDF(31317),
- AOM_ICDF(31838), AOM_ICDF(32137), AOM_ICDF(32342), AOM_ICDF(32547),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(15872), AOM_ICDF(16990), AOM_ICDF(21479), AOM_ICDF(21732),
- AOM_ICDF(24134), AOM_ICDF(24854), AOM_ICDF(30296), AOM_ICDF(30887),
- AOM_ICDF(31163), AOM_ICDF(31902), AOM_ICDF(32218), AOM_ICDF(32702),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(16256), AOM_ICDF(17280), AOM_ICDF(23081), AOM_ICDF(24039),
- AOM_ICDF(24457), AOM_ICDF(24838), AOM_ICDF(25346), AOM_ICDF(30329),
- AOM_ICDF(30908), AOM_ICDF(31746), AOM_ICDF(32206), AOM_ICDF(32639),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(14720), AOM_ICDF(19249), AOM_ICDF(20501), AOM_ICDF(22079),
- AOM_ICDF(22439), AOM_ICDF(23218), AOM_ICDF(23463), AOM_ICDF(24107),
- AOM_ICDF(30308), AOM_ICDF(31379), AOM_ICDF(31866), AOM_ICDF(32556),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(16768), AOM_ICDF(19967), AOM_ICDF(22374), AOM_ICDF(22976),
- AOM_ICDF(23836), AOM_ICDF(24050), AOM_ICDF(24642), AOM_ICDF(25760),
- AOM_ICDF(26653), AOM_ICDF(29585), AOM_ICDF(30937), AOM_ICDF(32518),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(16768), AOM_ICDF(20751), AOM_ICDF(23026), AOM_ICDF(23591),
- AOM_ICDF(24299), AOM_ICDF(24516), AOM_ICDF(24981), AOM_ICDF(25876),
- AOM_ICDF(26806), AOM_ICDF(29520), AOM_ICDF(31286), AOM_ICDF(32455),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(17536), AOM_ICDF(20055), AOM_ICDF(22965), AOM_ICDF(23507),
- AOM_ICDF(24210), AOM_ICDF(24398), AOM_ICDF(25098), AOM_ICDF(26366),
- AOM_ICDF(27033), AOM_ICDF(29674), AOM_ICDF(30689), AOM_ICDF(32530),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(17536), AOM_ICDF(22753), AOM_ICDF(27126), AOM_ICDF(27353),
- AOM_ICDF(27571), AOM_ICDF(28139), AOM_ICDF(28505), AOM_ICDF(29198),
- AOM_ICDF(29886), AOM_ICDF(30801), AOM_ICDF(31335), AOM_ICDF(32054),
- AOM_ICDF(32768), 0,
- },
- };
-#endif // CONFIG_CFL
-#else // !CONFIG_SMOOTH_HV
-static const aom_cdf_prob
- default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)] = {
- {
- AOM_ICDF(11264), AOM_ICDF(14965), AOM_ICDF(19742), AOM_ICDF(21904),
- AOM_ICDF(24115), AOM_ICDF(25678), AOM_ICDF(27210), AOM_ICDF(28705),
- AOM_ICDF(29782), AOM_ICDF(31424), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(13747), AOM_ICDF(18569), AOM_ICDF(20091),
- AOM_ICDF(21925), AOM_ICDF(23082), AOM_ICDF(24404), AOM_ICDF(26285),
- AOM_ICDF(27574), AOM_ICDF(30415), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9344), AOM_ICDF(14319), AOM_ICDF(19567), AOM_ICDF(20476),
- AOM_ICDF(21791), AOM_ICDF(22529), AOM_ICDF(23393), AOM_ICDF(24881),
- AOM_ICDF(26012), AOM_ICDF(30572), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12288), AOM_ICDF(15866), AOM_ICDF(21186), AOM_ICDF(21722),
- AOM_ICDF(22209), AOM_ICDF(22564), AOM_ICDF(22966), AOM_ICDF(24025),
- AOM_ICDF(24716), AOM_ICDF(30608), AOM_ICDF(32768), 0,
- },
- };
-
-static const aom_cdf_prob
- default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
- {
- AOM_ICDF(25472), AOM_ICDF(27697), AOM_ICDF(30693), AOM_ICDF(30916),
- AOM_ICDF(31125), AOM_ICDF(31406), AOM_ICDF(31679), AOM_ICDF(32085),
- AOM_ICDF(32334), AOM_ICDF(32682), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2176), AOM_ICDF(28142), AOM_ICDF(29335), AOM_ICDF(29504),
- AOM_ICDF(29762), AOM_ICDF(30642), AOM_ICDF(30741), AOM_ICDF(30902),
- AOM_ICDF(31683), AOM_ICDF(32529), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3328), AOM_ICDF(3901), AOM_ICDF(30984), AOM_ICDF(31068),
- AOM_ICDF(31241), AOM_ICDF(31389), AOM_ICDF(31697), AOM_ICDF(32379),
- AOM_ICDF(32483), AOM_ICDF(32653), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(23424), AOM_ICDF(25805), AOM_ICDF(27721), AOM_ICDF(29432),
- AOM_ICDF(29659), AOM_ICDF(30032), AOM_ICDF(30282), AOM_ICDF(31192),
- AOM_ICDF(32259), AOM_ICDF(32658), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(22784), AOM_ICDF(24177), AOM_ICDF(26209), AOM_ICDF(26476),
- AOM_ICDF(28679), AOM_ICDF(29698), AOM_ICDF(30786), AOM_ICDF(31257),
- AOM_ICDF(31596), AOM_ICDF(32690), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(19712), AOM_ICDF(22988), AOM_ICDF(24275), AOM_ICDF(24520),
- AOM_ICDF(25823), AOM_ICDF(31469), AOM_ICDF(31880), AOM_ICDF(32189),
- AOM_ICDF(32614), AOM_ICDF(32615), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(21376), AOM_ICDF(22085), AOM_ICDF(27643), AOM_ICDF(27799),
- AOM_ICDF(28596), AOM_ICDF(28890), AOM_ICDF(31767), AOM_ICDF(32255),
- AOM_ICDF(32405), AOM_ICDF(32723), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(19712), AOM_ICDF(20623), AOM_ICDF(28408), AOM_ICDF(28766),
- AOM_ICDF(29070), AOM_ICDF(29355), AOM_ICDF(29759), AOM_ICDF(32034),
- AOM_ICDF(32306), AOM_ICDF(32666), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(19584), AOM_ICDF(23437), AOM_ICDF(25295), AOM_ICDF(26200),
- AOM_ICDF(26612), AOM_ICDF(27372), AOM_ICDF(27704), AOM_ICDF(28319),
- AOM_ICDF(31664), AOM_ICDF(32562), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(20864), AOM_ICDF(23989), AOM_ICDF(26168), AOM_ICDF(26591),
- AOM_ICDF(27345), AOM_ICDF(27348), AOM_ICDF(27809), AOM_ICDF(28575),
- AOM_ICDF(29132), AOM_ICDF(32628), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(23680), AOM_ICDF(27582), AOM_ICDF(30197), AOM_ICDF(30312),
- AOM_ICDF(30464), AOM_ICDF(30995), AOM_ICDF(31208), AOM_ICDF(31574),
- AOM_ICDF(31985), AOM_ICDF(32519), AOM_ICDF(32768), 0,
- },
- };
-#endif // CONFIG_SMOOTH_HV
-
-#if CONFIG_EXT_PARTITION_TYPES
-static const aom_cdf_prob
- default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)] = {
- // 8x8 -> 4x4 only supports the four legacy partition types
- { AOM_ICDF(25472), AOM_ICDF(28949), AOM_ICDF(31052), AOM_ICDF(32768), 0,
- 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(18816), AOM_ICDF(22250), AOM_ICDF(28783), AOM_ICDF(32768), 0,
- 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(18944), AOM_ICDF(26126), AOM_ICDF(29188), AOM_ICDF(32768), 0,
- 0, 0, 0, 0, 0, 0 },
- { AOM_ICDF(15488), AOM_ICDF(22508), AOM_ICDF(27077), AOM_ICDF(32768), 0,
- 0, 0, 0, 0, 0, 0 },
- // 16x16 -> 8x8
- { AOM_ICDF(22272), AOM_ICDF(23768), AOM_ICDF(25043), AOM_ICDF(29996),
- AOM_ICDF(30495), AOM_ICDF(30994), AOM_ICDF(31419), AOM_ICDF(31844),
- AOM_ICDF(32343), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11776), AOM_ICDF(13457), AOM_ICDF(16315), AOM_ICDF(28229),
- AOM_ICDF(28789), AOM_ICDF(29349), AOM_ICDF(30302), AOM_ICDF(31255),
- AOM_ICDF(31816), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10496), AOM_ICDF(14802), AOM_ICDF(16136), AOM_ICDF(27127),
- AOM_ICDF(28563), AOM_ICDF(29999), AOM_ICDF(30444), AOM_ICDF(30889),
- AOM_ICDF(32324), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6784), AOM_ICDF(8763), AOM_ICDF(10440), AOM_ICDF(29110),
- AOM_ICDF(29770), AOM_ICDF(30430), AOM_ICDF(30989), AOM_ICDF(31548),
- AOM_ICDF(32208), AOM_ICDF(32768), 0 },
- // 32x32 -> 16x16
- { AOM_ICDF(22656), AOM_ICDF(23801), AOM_ICDF(24702), AOM_ICDF(30721),
- AOM_ICDF(31103), AOM_ICDF(31485), AOM_ICDF(31785), AOM_ICDF(32085),
- AOM_ICDF(32467), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8704), AOM_ICDF(9926), AOM_ICDF(12586), AOM_ICDF(28885),
- AOM_ICDF(29292), AOM_ICDF(29699), AOM_ICDF(30586), AOM_ICDF(31473),
- AOM_ICDF(31881), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6656), AOM_ICDF(10685), AOM_ICDF(11566), AOM_ICDF(27857),
- AOM_ICDF(29200), AOM_ICDF(30543), AOM_ICDF(30837), AOM_ICDF(31131),
- AOM_ICDF(32474), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2176), AOM_ICDF(3012), AOM_ICDF(3690), AOM_ICDF(31253),
- AOM_ICDF(31532), AOM_ICDF(31811), AOM_ICDF(32037), AOM_ICDF(32263),
- AOM_ICDF(32542), AOM_ICDF(32768), 0 },
- // 64x64 -> 32x32
- { AOM_ICDF(28416), AOM_ICDF(28705), AOM_ICDF(28926), AOM_ICDF(32258),
- AOM_ICDF(32354), AOM_ICDF(32450), AOM_ICDF(32523), AOM_ICDF(32596),
- AOM_ICDF(32693), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(9952), AOM_ICDF(11849), AOM_ICDF(30134),
- AOM_ICDF(30379), AOM_ICDF(30624), AOM_ICDF(31256), AOM_ICDF(31888),
- AOM_ICDF(32134), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7424), AOM_ICDF(9008), AOM_ICDF(9528), AOM_ICDF(30664),
- AOM_ICDF(31192), AOM_ICDF(31720), AOM_ICDF(31893), AOM_ICDF(32066),
- AOM_ICDF(32594), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(1710), AOM_ICDF(2069), AOM_ICDF(31978),
- AOM_ICDF(32121), AOM_ICDF(32264), AOM_ICDF(32383), AOM_ICDF(32502),
- AOM_ICDF(32647), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- // 128x128 -> 64x64
- { AOM_ICDF(28416), AOM_ICDF(28705), AOM_ICDF(28926), AOM_ICDF(32258),
- AOM_ICDF(32354), AOM_ICDF(32450), AOM_ICDF(32523), AOM_ICDF(32596),
- AOM_ICDF(32693), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(9952), AOM_ICDF(11849), AOM_ICDF(30134),
- AOM_ICDF(30379), AOM_ICDF(30624), AOM_ICDF(31256), AOM_ICDF(31888),
- AOM_ICDF(32134), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7424), AOM_ICDF(9008), AOM_ICDF(9528), AOM_ICDF(30664),
- AOM_ICDF(31192), AOM_ICDF(31720), AOM_ICDF(31893), AOM_ICDF(32066),
- AOM_ICDF(32594), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(1710), AOM_ICDF(2069), AOM_ICDF(31978),
- AOM_ICDF(32121), AOM_ICDF(32264), AOM_ICDF(32383), AOM_ICDF(32502),
- AOM_ICDF(32647), AOM_ICDF(32768), 0 },
-#endif
- };
-#else
-static const aom_cdf_prob
- default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(PARTITION_TYPES)] = {
- { AOM_ICDF(25472), AOM_ICDF(28949), AOM_ICDF(31052), AOM_ICDF(32768), 0 },
- { AOM_ICDF(18816), AOM_ICDF(22250), AOM_ICDF(28783), AOM_ICDF(32768), 0 },
- { AOM_ICDF(18944), AOM_ICDF(26126), AOM_ICDF(29188), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15488), AOM_ICDF(22508), AOM_ICDF(27077), AOM_ICDF(32768), 0 },
- { AOM_ICDF(22272), AOM_ICDF(25265), AOM_ICDF(27815), AOM_ICDF(32768), 0 },
- { AOM_ICDF(11776), AOM_ICDF(15138), AOM_ICDF(20854), AOM_ICDF(32768), 0 },
- { AOM_ICDF(10496), AOM_ICDF(19109), AOM_ICDF(21777), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6784), AOM_ICDF(10743), AOM_ICDF(14098), AOM_ICDF(32768), 0 },
- { AOM_ICDF(22656), AOM_ICDF(24947), AOM_ICDF(26749), AOM_ICDF(32768), 0 },
- { AOM_ICDF(8704), AOM_ICDF(11148), AOM_ICDF(16469), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6656), AOM_ICDF(14714), AOM_ICDF(16477), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2176), AOM_ICDF(3849), AOM_ICDF(5205), AOM_ICDF(32768), 0 },
- { AOM_ICDF(28416), AOM_ICDF(28994), AOM_ICDF(29436), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(10688), AOM_ICDF(14483), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7424), AOM_ICDF(10592), AOM_ICDF(11632), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(2141), AOM_ICDF(2859), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
- { AOM_ICDF(28416), AOM_ICDF(28994), AOM_ICDF(29436), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(10688), AOM_ICDF(14483), AOM_ICDF(32768), 0 },
- { AOM_ICDF(7424), AOM_ICDF(10592), AOM_ICDF(11632), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(2141), AOM_ICDF(2859), AOM_ICDF(32768), 0 },
-#endif
- };
-#endif
-
-#if CONFIG_EXT_TX
-static const aom_cdf_prob default_intra_ext_tx_cdf
- [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = {
- {
-// FIXME: unused zero positions, from uncoded trivial transform set
-#if CONFIG_CHROMA_2X2
- {
- { 0 },
- },
-#endif
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
-#if CONFIG_SMOOTH_HV
- { 0 },
- { 0 },
-#endif // CONFIG_SMOOTH_HV
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
-#if CONFIG_SMOOTH_HV
- { 0 },
- { 0 },
-#endif // CONFIG_SMOOTH_HV
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
-#if CONFIG_SMOOTH_HV
- { 0 },
- { 0 },
-#endif // CONFIG_SMOOTH_HV
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
-#if CONFIG_SMOOTH_HV
- { 0 },
- { 0 },
-#endif // CONFIG_SMOOTH_HV
- { 0 },
- },
- },
- {
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29048),
- AOM_ICDF(29296), AOM_ICDF(30164), AOM_ICDF(31466),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(26284),
- AOM_ICDF(26717), AOM_ICDF(28230), AOM_ICDF(30499),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(3938), AOM_ICDF(5860),
- AOM_ICDF(29404), AOM_ICDF(31086), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29048),
- AOM_ICDF(29296), AOM_ICDF(30164), AOM_ICDF(31466),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(26284),
- AOM_ICDF(26717), AOM_ICDF(28230), AOM_ICDF(30499),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(3938), AOM_ICDF(5860),
- AOM_ICDF(29404), AOM_ICDF(31086), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29048),
- AOM_ICDF(29296), AOM_ICDF(30164), AOM_ICDF(31466),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(26284),
- AOM_ICDF(26717), AOM_ICDF(28230), AOM_ICDF(30499),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(3938), AOM_ICDF(5860),
- AOM_ICDF(29404), AOM_ICDF(31086), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29048),
- AOM_ICDF(29296), AOM_ICDF(30164), AOM_ICDF(31466),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(26284),
- AOM_ICDF(26717), AOM_ICDF(28230), AOM_ICDF(30499),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(3938), AOM_ICDF(5860),
- AOM_ICDF(29404), AOM_ICDF(31086), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(27118), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
- AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
- AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
- },
- },
- {
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29792),
- AOM_ICDF(31280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(27581),
- AOM_ICDF(30174), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(28924),
- AOM_ICDF(30846), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29792),
- AOM_ICDF(31280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(27581),
- AOM_ICDF(30174), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(28924),
- AOM_ICDF(30846), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29792),
- AOM_ICDF(31280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(27581),
- AOM_ICDF(30174), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(28924),
- AOM_ICDF(30846), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(28800), AOM_ICDF(29792),
- AOM_ICDF(31280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(25852), AOM_ICDF(27581),
- AOM_ICDF(30174), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(28924),
- AOM_ICDF(30846), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(26310),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
- AOM_ICDF(26611), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
- AOM_ICDF(32768), 0 },
- },
- },
-#if CONFIG_MRC_TX
- {
- {
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- },
- {
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1152), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#endif // CONFIG_SMOOTH_HV
- { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
- },
- }
-#endif // CONFIG_MRC_TX
- };
-static const aom_cdf_prob
- default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE(
- TX_TYPES)] = {
- {
-#if CONFIG_CHROMA_2X2
- { 0 },
-#endif
- { 0 },
- { 0 },
- { 0 },
- { 0 } },
- {
-#if CONFIG_CHROMA_2X2
- { 0 },
-#endif
- { AOM_ICDF(1280), AOM_ICDF(1453), AOM_ICDF(1626), AOM_ICDF(2277),
- AOM_ICDF(2929), AOM_ICDF(3580), AOM_ICDF(4232), AOM_ICDF(16717),
- AOM_ICDF(19225), AOM_ICDF(21733), AOM_ICDF(24241), AOM_ICDF(26749),
- AOM_ICDF(28253), AOM_ICDF(29758), AOM_ICDF(31263), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(1453), AOM_ICDF(1626), AOM_ICDF(2277),
- AOM_ICDF(2929), AOM_ICDF(3580), AOM_ICDF(4232), AOM_ICDF(16717),
- AOM_ICDF(19225), AOM_ICDF(21733), AOM_ICDF(24241), AOM_ICDF(26749),
- AOM_ICDF(28253), AOM_ICDF(29758), AOM_ICDF(31263), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(1453), AOM_ICDF(1626), AOM_ICDF(2277),
- AOM_ICDF(2929), AOM_ICDF(3580), AOM_ICDF(4232), AOM_ICDF(16717),
- AOM_ICDF(19225), AOM_ICDF(21733), AOM_ICDF(24241), AOM_ICDF(26749),
- AOM_ICDF(28253), AOM_ICDF(29758), AOM_ICDF(31263), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(1453), AOM_ICDF(1626), AOM_ICDF(2277),
- AOM_ICDF(2929), AOM_ICDF(3580), AOM_ICDF(4232), AOM_ICDF(16717),
- AOM_ICDF(19225), AOM_ICDF(21733), AOM_ICDF(24241), AOM_ICDF(26749),
- AOM_ICDF(28253), AOM_ICDF(29758), AOM_ICDF(31263), AOM_ICDF(32768),
- 0 } },
- {
-#if CONFIG_CHROMA_2X2
- { 0 },
-#endif
- { AOM_ICDF(1280), AOM_ICDF(3125), AOM_ICDF(4970), AOM_ICDF(17132),
- AOM_ICDF(19575), AOM_ICDF(22018), AOM_ICDF(24461), AOM_ICDF(26904),
- AOM_ICDF(28370), AOM_ICDF(29836), AOM_ICDF(31302), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(3125), AOM_ICDF(4970), AOM_ICDF(17132),
- AOM_ICDF(19575), AOM_ICDF(22018), AOM_ICDF(24461), AOM_ICDF(26904),
- AOM_ICDF(28370), AOM_ICDF(29836), AOM_ICDF(31302), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(3125), AOM_ICDF(4970), AOM_ICDF(17132),
- AOM_ICDF(19575), AOM_ICDF(22018), AOM_ICDF(24461), AOM_ICDF(26904),
- AOM_ICDF(28370), AOM_ICDF(29836), AOM_ICDF(31302), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1280), AOM_ICDF(3125), AOM_ICDF(4970), AOM_ICDF(17132),
- AOM_ICDF(19575), AOM_ICDF(22018), AOM_ICDF(24461), AOM_ICDF(26904),
- AOM_ICDF(28370), AOM_ICDF(29836), AOM_ICDF(31302), AOM_ICDF(32768),
- 0 } },
- {
-#if CONFIG_CHROMA_2X2
- { 0 },
-#endif
- { AOM_ICDF(1536), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1536), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1536), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1536), AOM_ICDF(32768), 0 } },
-#if CONFIG_MRC_TX
- {
-#if CONFIG_CHROMA_2X2
- { 0 },
-#endif
- { AOM_ICDF(30080), AOM_ICDF(31781), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4608), AOM_ICDF(32658), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4352), AOM_ICDF(4685), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19072), AOM_ICDF(26776), AOM_ICDF(32768), 0 } },
-#endif // CONFIG_MRC_TX
- };
-#else
-#if CONFIG_MRC_TX
-static const aom_cdf_prob
- default_intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][CDF_SIZE(TX_TYPES)] = {
-#if CONFIG_CHROMA_2X2
- { { AOM_ICDF(30720), AOM_ICDF(31104), AOM_ICDF(31400), AOM_ICDF(32084),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(31764),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(1642),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(1642),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(1280), AOM_ICDF(31760), AOM_ICDF(32264),
- AOM_ICDF(32768), 0 } },
-#endif
- { { AOM_ICDF(30720), AOM_ICDF(31104), AOM_ICDF(31400), AOM_ICDF(32084),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(31764),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(1642),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(582), AOM_ICDF(638), AOM_ICDF(1642),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(1280), AOM_ICDF(31760), AOM_ICDF(32264),
- AOM_ICDF(32768), 0 } },
-
- { { AOM_ICDF(31232), AOM_ICDF(31488), AOM_ICDF(31742), AOM_ICDF(32255),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(1152), AOM_ICDF(1272), AOM_ICDF(31784),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(1152), AOM_ICDF(1272), AOM_ICDF(2256),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(1052), AOM_ICDF(1272), AOM_ICDF(2256),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(1792), AOM_ICDF(31776), AOM_ICDF(32272),
- AOM_ICDF(32768), 0 } },
-
- { { AOM_ICDF(31744), AOM_ICDF(31940), AOM_ICDF(32084), AOM_ICDF(32426),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(2176), AOM_ICDF(2528), AOM_ICDF(31823),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(2176), AOM_ICDF(2528), AOM_ICDF(3473),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(2176), AOM_ICDF(2528), AOM_ICDF(3473),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(28160), AOM_ICDF(31808), AOM_ICDF(32288),
- AOM_ICDF(32768), 0 } },
- };
-
-static const aom_cdf_prob
- default_inter_ext_tx_cdf[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)] = {
-#if CONFIG_CHROMA_2X2
- { AOM_ICDF(20480), AOM_ICDF(23040), AOM_ICDF(24560), AOM_ICDF(28664),
- AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(20480), AOM_ICDF(23040), AOM_ICDF(24560), AOM_ICDF(28664),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(22528), AOM_ICDF(24320), AOM_ICDF(25928), AOM_ICDF(29348),
- AOM_ICDF(32768), 0 },
- { AOM_ICDF(24576), AOM_ICDF(25600), AOM_ICDF(27296), AOM_ICDF(30032),
- AOM_ICDF(32768), 0 },
- };
-#else // CONFIG_MRC_TX
-static const aom_cdf_prob
- default_intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][CDF_SIZE(TX_TYPES)] = {
-#if CONFIG_CHROMA_2X2
- { { AOM_ICDF(30720), AOM_ICDF(31400), AOM_ICDF(32084), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(512), AOM_ICDF(638), AOM_ICDF(31764), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(638), AOM_ICDF(1642), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(31760), AOM_ICDF(32264), AOM_ICDF(32768),
- 0 } },
-#endif
- { { AOM_ICDF(30720), AOM_ICDF(31400), AOM_ICDF(32084), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(512), AOM_ICDF(638), AOM_ICDF(31764), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(638), AOM_ICDF(1642), AOM_ICDF(32768), 0 },
- { AOM_ICDF(512), AOM_ICDF(31760), AOM_ICDF(32264), AOM_ICDF(32768),
- 0 } },
-
- { { AOM_ICDF(31232), AOM_ICDF(31742), AOM_ICDF(32255), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(1024), AOM_ICDF(1272), AOM_ICDF(31784), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(1272), AOM_ICDF(2256), AOM_ICDF(32768), 0 },
- { AOM_ICDF(1024), AOM_ICDF(31776), AOM_ICDF(32272), AOM_ICDF(32768),
- 0 } },
- { { AOM_ICDF(31744), AOM_ICDF(32084), AOM_ICDF(32426), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(2048), AOM_ICDF(2528), AOM_ICDF(31823), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(2528), AOM_ICDF(3473), AOM_ICDF(32768), 0 },
- { AOM_ICDF(2048), AOM_ICDF(31808), AOM_ICDF(32288), AOM_ICDF(32768),
- 0 } },
- };
-
-static const aom_cdf_prob
- default_inter_ext_tx_cdf[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)] = {
-#if CONFIG_CHROMA_2X2
- { AOM_ICDF(20480), AOM_ICDF(24560), AOM_ICDF(28664), AOM_ICDF(32768), 0 },
-#endif
- { AOM_ICDF(20480), AOM_ICDF(24560), AOM_ICDF(28664), AOM_ICDF(32768), 0 },
- { AOM_ICDF(22528), AOM_ICDF(25928), AOM_ICDF(29348), AOM_ICDF(32768), 0 },
- { AOM_ICDF(24576), AOM_ICDF(27296), AOM_ICDF(30032), AOM_ICDF(32768), 0 },
- };
-#endif // CONFIG_MRC_TX
-#endif // !CONFIG_EXT_TX
-
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-static const aom_cdf_prob
- default_intra_filter_cdf[INTRA_FILTERS + 1][CDF_SIZE(INTRA_FILTERS)] = {
- { AOM_ICDF(12544), AOM_ICDF(17521), AOM_ICDF(21095), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12544), AOM_ICDF(19022), AOM_ICDF(23318), AOM_ICDF(32768), 0 },
- { AOM_ICDF(12032), AOM_ICDF(17297), AOM_ICDF(23522), AOM_ICDF(32768), 0 },
- { AOM_ICDF(6272), AOM_ICDF(8860), AOM_ICDF(11101), AOM_ICDF(32768), 0 },
- { AOM_ICDF(9216), AOM_ICDF(12712), AOM_ICDF(16629), AOM_ICDF(32768), 0 },
- };
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-
-#if CONFIG_CFL
-static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
- AOM_ICDF(1892), AOM_ICDF(2229), AOM_ICDF(11464),
- AOM_ICDF(14116), AOM_ICDF(25661), AOM_ICDF(26409),
- AOM_ICDF(32508), AOM_ICDF(32768), 0
-};
-
-static const aom_cdf_prob
- default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
- { AOM_ICDF(16215), AOM_ICDF(27740), AOM_ICDF(31726), AOM_ICDF(32606),
- AOM_ICDF(32736), AOM_ICDF(32751), AOM_ICDF(32757), AOM_ICDF(32759),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15213), AOM_ICDF(24615), AOM_ICDF(29704), AOM_ICDF(31974),
- AOM_ICDF(32545), AOM_ICDF(32673), AOM_ICDF(32713), AOM_ICDF(32746),
- AOM_ICDF(32753), AOM_ICDF(32756), AOM_ICDF(32758), AOM_ICDF(32761),
- AOM_ICDF(32763), AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32768), 0 },
- { AOM_ICDF(13250), AOM_ICDF(24677), AOM_ICDF(29113), AOM_ICDF(31666),
- AOM_ICDF(32408), AOM_ICDF(32578), AOM_ICDF(32628), AOM_ICDF(32711),
- AOM_ICDF(32730), AOM_ICDF(32738), AOM_ICDF(32744), AOM_ICDF(32749),
- AOM_ICDF(32752), AOM_ICDF(32756), AOM_ICDF(32759), AOM_ICDF(32768), 0 },
- { AOM_ICDF(24593), AOM_ICDF(30787), AOM_ICDF(32062), AOM_ICDF(32495),
- AOM_ICDF(32656), AOM_ICDF(32707), AOM_ICDF(32735), AOM_ICDF(32747),
- AOM_ICDF(32752), AOM_ICDF(32757), AOM_ICDF(32760), AOM_ICDF(32763),
- AOM_ICDF(32764), AOM_ICDF(32765), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
- { AOM_ICDF(19883), AOM_ICDF(27419), AOM_ICDF(30100), AOM_ICDF(31392),
- AOM_ICDF(31896), AOM_ICDF(32184), AOM_ICDF(32299), AOM_ICDF(32511),
- AOM_ICDF(32568), AOM_ICDF(32602), AOM_ICDF(32628), AOM_ICDF(32664),
- AOM_ICDF(32680), AOM_ICDF(32691), AOM_ICDF(32708), AOM_ICDF(32768), 0 },
- { AOM_ICDF(15939), AOM_ICDF(24151), AOM_ICDF(27754), AOM_ICDF(29680),
- AOM_ICDF(30651), AOM_ICDF(31267), AOM_ICDF(31527), AOM_ICDF(31868),
- AOM_ICDF(32001), AOM_ICDF(32090), AOM_ICDF(32181), AOM_ICDF(32284),
- AOM_ICDF(32314), AOM_ICDF(32366), AOM_ICDF(32486), AOM_ICDF(32768), 0 }
- };
-#endif
-
-#if CONFIG_KF_CTX
-// TODO(jingning): This initial models are copied directly from the entries
-// from the original table. The copied indexes are (0, 0), (0, 1), .. (4, 4).
-// It is possible to re-train this model and bring back the 0.14% loss in CIF
-// set key frame coding. This reduction in context model does not change the
-// key frame coding stats for mid and high resolution sets.
-const aom_cdf_prob
- default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE(
- INTRA_MODES)] = {
- {
- {
- AOM_ICDF(14208), AOM_ICDF(17049), AOM_ICDF(20482),
- AOM_ICDF(21400), AOM_ICDF(22520), AOM_ICDF(23261),
- AOM_ICDF(23963), AOM_ICDF(25010), AOM_ICDF(25828),
- AOM_ICDF(28398), AOM_ICDF(29394), AOM_ICDF(30738),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(18295), AOM_ICDF(19872),
- AOM_ICDF(20945), AOM_ICDF(21933), AOM_ICDF(22818),
- AOM_ICDF(23334), AOM_ICDF(24033), AOM_ICDF(24996),
- AOM_ICDF(27652), AOM_ICDF(29060), AOM_ICDF(30071),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(6461), AOM_ICDF(19840), AOM_ICDF(20310),
- AOM_ICDF(21151), AOM_ICDF(21506), AOM_ICDF(22535),
- AOM_ICDF(23900), AOM_ICDF(24281), AOM_ICDF(26958),
- AOM_ICDF(27680), AOM_ICDF(29636), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12544), AOM_ICDF(15177), AOM_ICDF(17666),
- AOM_ICDF(19855), AOM_ICDF(21147), AOM_ICDF(22017),
- AOM_ICDF(22797), AOM_ICDF(24514), AOM_ICDF(25779),
- AOM_ICDF(28716), AOM_ICDF(29772), AOM_ICDF(31267),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7552), AOM_ICDF(9909), AOM_ICDF(11908), AOM_ICDF(13141),
- AOM_ICDF(18765), AOM_ICDF(22029), AOM_ICDF(23872),
- AOM_ICDF(24920), AOM_ICDF(25674), AOM_ICDF(29031),
- AOM_ICDF(30244), AOM_ICDF(31684), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(3968), AOM_ICDF(17613), AOM_ICDF(19125), AOM_ICDF(19550),
- AOM_ICDF(20305), AOM_ICDF(21908), AOM_ICDF(22274),
- AOM_ICDF(22719), AOM_ICDF(23959), AOM_ICDF(26970),
- AOM_ICDF(29013), AOM_ICDF(29843), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3072), AOM_ICDF(21231), AOM_ICDF(21863), AOM_ICDF(22306),
- AOM_ICDF(22674), AOM_ICDF(23414), AOM_ICDF(23517),
- AOM_ICDF(23798), AOM_ICDF(24770), AOM_ICDF(27032),
- AOM_ICDF(29016), AOM_ICDF(29636), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2560), AOM_ICDF(9825), AOM_ICDF(15681), AOM_ICDF(16370),
- AOM_ICDF(17054), AOM_ICDF(17687), AOM_ICDF(18236),
- AOM_ICDF(19273), AOM_ICDF(20311), AOM_ICDF(24863),
- AOM_ICDF(26825), AOM_ICDF(28756), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(15140), AOM_ICDF(16485), AOM_ICDF(18364),
- AOM_ICDF(19181), AOM_ICDF(20394), AOM_ICDF(20663),
- AOM_ICDF(22098), AOM_ICDF(23936), AOM_ICDF(27555),
- AOM_ICDF(29704), AOM_ICDF(30849), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(13101), AOM_ICDF(14006), AOM_ICDF(14974),
- AOM_ICDF(17818), AOM_ICDF(21093), AOM_ICDF(21930),
- AOM_ICDF(22566), AOM_ICDF(24137), AOM_ICDF(27732),
- AOM_ICDF(29814), AOM_ICDF(30904), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(11392), AOM_ICDF(12961), AOM_ICDF(20901),
- AOM_ICDF(21544), AOM_ICDF(22490), AOM_ICDF(22928),
- AOM_ICDF(23888), AOM_ICDF(25214), AOM_ICDF(25777),
- AOM_ICDF(28256), AOM_ICDF(29102), AOM_ICDF(30513),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8064), AOM_ICDF(13595), AOM_ICDF(18888), AOM_ICDF(19616),
- AOM_ICDF(20765), AOM_ICDF(21454), AOM_ICDF(21990),
- AOM_ICDF(23103), AOM_ICDF(23980), AOM_ICDF(26772),
- AOM_ICDF(28070), AOM_ICDF(29197), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4352), AOM_ICDF(5059), AOM_ICDF(21705), AOM_ICDF(22099),
- AOM_ICDF(22703), AOM_ICDF(22846), AOM_ICDF(23679),
- AOM_ICDF(25469), AOM_ICDF(25728), AOM_ICDF(27919),
- AOM_ICDF(28484), AOM_ICDF(30215), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10752), AOM_ICDF(12277), AOM_ICDF(16471),
- AOM_ICDF(18276), AOM_ICDF(19443), AOM_ICDF(19917),
- AOM_ICDF(21158), AOM_ICDF(23881), AOM_ICDF(24892),
- AOM_ICDF(27709), AOM_ICDF(28771), AOM_ICDF(30274),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8320), AOM_ICDF(10000), AOM_ICDF(14147), AOM_ICDF(15330),
- AOM_ICDF(19197), AOM_ICDF(20923), AOM_ICDF(22954),
- AOM_ICDF(24541), AOM_ICDF(25285), AOM_ICDF(28407),
- AOM_ICDF(29431), AOM_ICDF(30953), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(10240), AOM_ICDF(12819), AOM_ICDF(15545),
- AOM_ICDF(18248), AOM_ICDF(19779), AOM_ICDF(20932),
- AOM_ICDF(21899), AOM_ICDF(23377), AOM_ICDF(25448),
- AOM_ICDF(28730), AOM_ICDF(29936), AOM_ICDF(31536),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7552), AOM_ICDF(15309), AOM_ICDF(16645), AOM_ICDF(19760),
- AOM_ICDF(20653), AOM_ICDF(21650), AOM_ICDF(22221),
- AOM_ICDF(23273), AOM_ICDF(25509), AOM_ICDF(28683),
- AOM_ICDF(30153), AOM_ICDF(31192), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5248), AOM_ICDF(6840), AOM_ICDF(16129), AOM_ICDF(17940),
- AOM_ICDF(19069), AOM_ICDF(19660), AOM_ICDF(20588),
- AOM_ICDF(22760), AOM_ICDF(23927), AOM_ICDF(27538),
- AOM_ICDF(28397), AOM_ICDF(30725), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11008), AOM_ICDF(11903), AOM_ICDF(13794),
- AOM_ICDF(21320), AOM_ICDF(21931), AOM_ICDF(22310),
- AOM_ICDF(22546), AOM_ICDF(25375), AOM_ICDF(27347),
- AOM_ICDF(29800), AOM_ICDF(30761), AOM_ICDF(31833),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(8678), AOM_ICDF(10313), AOM_ICDF(13073),
- AOM_ICDF(16823), AOM_ICDF(19980), AOM_ICDF(21520),
- AOM_ICDF(23242), AOM_ICDF(25344), AOM_ICDF(28797),
- AOM_ICDF(30405), AOM_ICDF(31940), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(7296), AOM_ICDF(9304), AOM_ICDF(11772), AOM_ICDF(12529),
- AOM_ICDF(18014), AOM_ICDF(20418), AOM_ICDF(23076),
- AOM_ICDF(24662), AOM_ICDF(25549), AOM_ICDF(29074),
- AOM_ICDF(30392), AOM_ICDF(31773), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(11687), AOM_ICDF(13541), AOM_ICDF(14431),
- AOM_ICDF(18214), AOM_ICDF(20761), AOM_ICDF(22269),
- AOM_ICDF(23320), AOM_ICDF(24633), AOM_ICDF(28339),
- AOM_ICDF(30193), AOM_ICDF(31268), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3584), AOM_ICDF(4428), AOM_ICDF(13496), AOM_ICDF(14189),
- AOM_ICDF(17372), AOM_ICDF(18617), AOM_ICDF(20609),
- AOM_ICDF(22615), AOM_ICDF(23270), AOM_ICDF(27280),
- AOM_ICDF(28305), AOM_ICDF(30602), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(8834), AOM_ICDF(10499), AOM_ICDF(14357),
- AOM_ICDF(17671), AOM_ICDF(19150), AOM_ICDF(20460),
- AOM_ICDF(23235), AOM_ICDF(24391), AOM_ICDF(28351),
- AOM_ICDF(29843), AOM_ICDF(31481), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(5888), AOM_ICDF(7093), AOM_ICDF(7902),
- AOM_ICDF(18290), AOM_ICDF(22123), AOM_ICDF(24511),
- AOM_ICDF(25532), AOM_ICDF(26360), AOM_ICDF(29653),
- AOM_ICDF(30954), AOM_ICDF(32215), AOM_ICDF(32768), 0,
- },
- },
- };
-#else
-const aom_cdf_prob default_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(
- INTRA_MODES)] = {
-#if CONFIG_SMOOTH_HV
- {
- {
- AOM_ICDF(14208), AOM_ICDF(17049), AOM_ICDF(20482), AOM_ICDF(21400),
- AOM_ICDF(22520), AOM_ICDF(23261), AOM_ICDF(23963), AOM_ICDF(25010),
- AOM_ICDF(25828), AOM_ICDF(28398), AOM_ICDF(29394), AOM_ICDF(30738),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(18295), AOM_ICDF(19872), AOM_ICDF(20945),
- AOM_ICDF(21933), AOM_ICDF(22818), AOM_ICDF(23334), AOM_ICDF(24033),
- AOM_ICDF(24996), AOM_ICDF(27652), AOM_ICDF(29060), AOM_ICDF(30071),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(6461), AOM_ICDF(19840), AOM_ICDF(20310),
- AOM_ICDF(21151), AOM_ICDF(21506), AOM_ICDF(22535), AOM_ICDF(23900),
- AOM_ICDF(24281), AOM_ICDF(26958), AOM_ICDF(27680), AOM_ICDF(29636),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12544), AOM_ICDF(15177), AOM_ICDF(17666), AOM_ICDF(19855),
- AOM_ICDF(21147), AOM_ICDF(22017), AOM_ICDF(22797), AOM_ICDF(24514),
- AOM_ICDF(25779), AOM_ICDF(28716), AOM_ICDF(29772), AOM_ICDF(31267),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7552), AOM_ICDF(9909), AOM_ICDF(11908), AOM_ICDF(13141),
- AOM_ICDF(18765), AOM_ICDF(22029), AOM_ICDF(23872), AOM_ICDF(24920),
- AOM_ICDF(25674), AOM_ICDF(29031), AOM_ICDF(30244), AOM_ICDF(31684),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11008), AOM_ICDF(15004), AOM_ICDF(16534), AOM_ICDF(18158),
- AOM_ICDF(21515), AOM_ICDF(26668), AOM_ICDF(27834), AOM_ICDF(28735),
- AOM_ICDF(30471), AOM_ICDF(30839), AOM_ICDF(30969), AOM_ICDF(31068),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(7963), AOM_ICDF(11944), AOM_ICDF(12780),
- AOM_ICDF(17944), AOM_ICDF(19198), AOM_ICDF(24071), AOM_ICDF(25295),
- AOM_ICDF(25834), AOM_ICDF(29014), AOM_ICDF(29949), AOM_ICDF(31733),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8192), AOM_ICDF(10189), AOM_ICDF(14596), AOM_ICDF(15680),
- AOM_ICDF(17143), AOM_ICDF(17909), AOM_ICDF(19201), AOM_ICDF(23711),
- AOM_ICDF(24503), AOM_ICDF(28207), AOM_ICDF(29338), AOM_ICDF(31424),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10752), AOM_ICDF(13199), AOM_ICDF(15048), AOM_ICDF(17151),
- AOM_ICDF(18445), AOM_ICDF(19604), AOM_ICDF(20363), AOM_ICDF(21782),
- AOM_ICDF(24311), AOM_ICDF(28026), AOM_ICDF(29517), AOM_ICDF(30962),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(10301), AOM_ICDF(13245), AOM_ICDF(14307),
- AOM_ICDF(16021), AOM_ICDF(16257), AOM_ICDF(17265), AOM_ICDF(18739),
- AOM_ICDF(20080), AOM_ICDF(26066), AOM_ICDF(28325), AOM_ICDF(31184),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(10893), AOM_ICDF(13773), AOM_ICDF(14824),
- AOM_ICDF(16540), AOM_ICDF(16926), AOM_ICDF(17748), AOM_ICDF(18876),
- AOM_ICDF(20396), AOM_ICDF(25974), AOM_ICDF(28795), AOM_ICDF(30820),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(11005), AOM_ICDF(14320), AOM_ICDF(15349),
- AOM_ICDF(16746), AOM_ICDF(16884), AOM_ICDF(17887), AOM_ICDF(19304),
- AOM_ICDF(20265), AOM_ICDF(26115), AOM_ICDF(27672), AOM_ICDF(31358),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(9504), AOM_ICDF(15437), AOM_ICDF(16399),
- AOM_ICDF(17355), AOM_ICDF(17948), AOM_ICDF(18814), AOM_ICDF(20270),
- AOM_ICDF(21134), AOM_ICDF(23690), AOM_ICDF(24759), AOM_ICDF(26454),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(3968), AOM_ICDF(17613), AOM_ICDF(19125), AOM_ICDF(19550),
- AOM_ICDF(20305), AOM_ICDF(21908), AOM_ICDF(22274), AOM_ICDF(22719),
- AOM_ICDF(23959), AOM_ICDF(26970), AOM_ICDF(29013), AOM_ICDF(29843),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3072), AOM_ICDF(21231), AOM_ICDF(21863), AOM_ICDF(22306),
- AOM_ICDF(22674), AOM_ICDF(23414), AOM_ICDF(23517), AOM_ICDF(23798),
- AOM_ICDF(24770), AOM_ICDF(27032), AOM_ICDF(29016), AOM_ICDF(29636),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2560), AOM_ICDF(9825), AOM_ICDF(15681), AOM_ICDF(16370),
- AOM_ICDF(17054), AOM_ICDF(17687), AOM_ICDF(18236), AOM_ICDF(19273),
- AOM_ICDF(20311), AOM_ICDF(24863), AOM_ICDF(26825), AOM_ICDF(28756),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(15140), AOM_ICDF(16485), AOM_ICDF(18364),
- AOM_ICDF(19181), AOM_ICDF(20394), AOM_ICDF(20663), AOM_ICDF(22098),
- AOM_ICDF(23936), AOM_ICDF(27555), AOM_ICDF(29704), AOM_ICDF(30849),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(13101), AOM_ICDF(14006), AOM_ICDF(14974),
- AOM_ICDF(17818), AOM_ICDF(21093), AOM_ICDF(21930), AOM_ICDF(22566),
- AOM_ICDF(24137), AOM_ICDF(27732), AOM_ICDF(29814), AOM_ICDF(30904),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4352), AOM_ICDF(17824), AOM_ICDF(18715), AOM_ICDF(19632),
- AOM_ICDF(21519), AOM_ICDF(26341), AOM_ICDF(26922), AOM_ICDF(27575),
- AOM_ICDF(29863), AOM_ICDF(30432), AOM_ICDF(30769), AOM_ICDF(30881),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(11971), AOM_ICDF(13509), AOM_ICDF(14295),
- AOM_ICDF(17202), AOM_ICDF(19005), AOM_ICDF(21605), AOM_ICDF(22458),
- AOM_ICDF(23839), AOM_ICDF(27774), AOM_ICDF(29492), AOM_ICDF(30787),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4224), AOM_ICDF(13072), AOM_ICDF(15288), AOM_ICDF(16406),
- AOM_ICDF(17285), AOM_ICDF(18362), AOM_ICDF(19003), AOM_ICDF(21378),
- AOM_ICDF(22942), AOM_ICDF(27093), AOM_ICDF(29381), AOM_ICDF(30872),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(15375), AOM_ICDF(16188), AOM_ICDF(17415),
- AOM_ICDF(18183), AOM_ICDF(19756), AOM_ICDF(20030), AOM_ICDF(20883),
- AOM_ICDF(23935), AOM_ICDF(27428), AOM_ICDF(29627), AOM_ICDF(30608),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2816), AOM_ICDF(14999), AOM_ICDF(16352), AOM_ICDF(16969),
- AOM_ICDF(17836), AOM_ICDF(18125), AOM_ICDF(18514), AOM_ICDF(19181),
- AOM_ICDF(20650), AOM_ICDF(25773), AOM_ICDF(29172), AOM_ICDF(30662),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2560), AOM_ICDF(16158), AOM_ICDF(17320), AOM_ICDF(17839),
- AOM_ICDF(18545), AOM_ICDF(18848), AOM_ICDF(19130), AOM_ICDF(19599),
- AOM_ICDF(20863), AOM_ICDF(25449), AOM_ICDF(29304), AOM_ICDF(30408),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3328), AOM_ICDF(15146), AOM_ICDF(16880), AOM_ICDF(17523),
- AOM_ICDF(18340), AOM_ICDF(18563), AOM_ICDF(18896), AOM_ICDF(19582),
- AOM_ICDF(20944), AOM_ICDF(25914), AOM_ICDF(28759), AOM_ICDF(30583),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2560), AOM_ICDF(16618), AOM_ICDF(18460), AOM_ICDF(19207),
- AOM_ICDF(19654), AOM_ICDF(20276), AOM_ICDF(20529), AOM_ICDF(21179),
- AOM_ICDF(22355), AOM_ICDF(25423), AOM_ICDF(27696), AOM_ICDF(28638),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(11392), AOM_ICDF(12961), AOM_ICDF(20901), AOM_ICDF(21544),
- AOM_ICDF(22490), AOM_ICDF(22928), AOM_ICDF(23888), AOM_ICDF(25214),
- AOM_ICDF(25777), AOM_ICDF(28256), AOM_ICDF(29102), AOM_ICDF(30513),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8064), AOM_ICDF(13595), AOM_ICDF(18888), AOM_ICDF(19616),
- AOM_ICDF(20765), AOM_ICDF(21454), AOM_ICDF(21990), AOM_ICDF(23103),
- AOM_ICDF(23980), AOM_ICDF(26772), AOM_ICDF(28070), AOM_ICDF(29197),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4352), AOM_ICDF(5059), AOM_ICDF(21705), AOM_ICDF(22099),
- AOM_ICDF(22703), AOM_ICDF(22846), AOM_ICDF(23679), AOM_ICDF(25469),
- AOM_ICDF(25728), AOM_ICDF(27919), AOM_ICDF(28484), AOM_ICDF(30215),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10752), AOM_ICDF(12277), AOM_ICDF(16471), AOM_ICDF(18276),
- AOM_ICDF(19443), AOM_ICDF(19917), AOM_ICDF(21158), AOM_ICDF(23881),
- AOM_ICDF(24892), AOM_ICDF(27709), AOM_ICDF(28771), AOM_ICDF(30274),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8320), AOM_ICDF(10000), AOM_ICDF(14147), AOM_ICDF(15330),
- AOM_ICDF(19197), AOM_ICDF(20923), AOM_ICDF(22954), AOM_ICDF(24541),
- AOM_ICDF(25285), AOM_ICDF(28407), AOM_ICDF(29431), AOM_ICDF(30953),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11264), AOM_ICDF(14751), AOM_ICDF(18517), AOM_ICDF(20285),
- AOM_ICDF(23172), AOM_ICDF(25970), AOM_ICDF(27312), AOM_ICDF(28684),
- AOM_ICDF(29803), AOM_ICDF(30242), AOM_ICDF(30412), AOM_ICDF(30668),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(7509), AOM_ICDF(14190), AOM_ICDF(14953),
- AOM_ICDF(17905), AOM_ICDF(18452), AOM_ICDF(23074), AOM_ICDF(24910),
- AOM_ICDF(25374), AOM_ICDF(28605), AOM_ICDF(29542), AOM_ICDF(31640),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(7644), AOM_ICDF(15953), AOM_ICDF(17055),
- AOM_ICDF(17945), AOM_ICDF(18242), AOM_ICDF(19351), AOM_ICDF(24705),
- AOM_ICDF(25365), AOM_ICDF(28466), AOM_ICDF(29334), AOM_ICDF(31245),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8192), AOM_ICDF(9802), AOM_ICDF(14519), AOM_ICDF(15740),
- AOM_ICDF(17351), AOM_ICDF(18084), AOM_ICDF(18962), AOM_ICDF(20908),
- AOM_ICDF(22937), AOM_ICDF(26847), AOM_ICDF(28284), AOM_ICDF(29888),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5888), AOM_ICDF(7534), AOM_ICDF(14635), AOM_ICDF(15436),
- AOM_ICDF(16710), AOM_ICDF(16830), AOM_ICDF(18000), AOM_ICDF(19760),
- AOM_ICDF(20571), AOM_ICDF(25777), AOM_ICDF(27649), AOM_ICDF(30668),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5248), AOM_ICDF(7364), AOM_ICDF(14858), AOM_ICDF(15545),
- AOM_ICDF(16861), AOM_ICDF(17016), AOM_ICDF(17859), AOM_ICDF(19384),
- AOM_ICDF(20237), AOM_ICDF(25239), AOM_ICDF(27715), AOM_ICDF(29865),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(7989), AOM_ICDF(15472), AOM_ICDF(16265),
- AOM_ICDF(17271), AOM_ICDF(17334), AOM_ICDF(18563), AOM_ICDF(20327),
- AOM_ICDF(20916), AOM_ICDF(26173), AOM_ICDF(27350), AOM_ICDF(31034),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(6411), AOM_ICDF(17828), AOM_ICDF(18611),
- AOM_ICDF(19399), AOM_ICDF(19684), AOM_ICDF(20504), AOM_ICDF(21782),
- AOM_ICDF(22335), AOM_ICDF(25286), AOM_ICDF(26352), AOM_ICDF(28016),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(10240), AOM_ICDF(12819), AOM_ICDF(15545), AOM_ICDF(18248),
- AOM_ICDF(19779), AOM_ICDF(20932), AOM_ICDF(21899), AOM_ICDF(23377),
- AOM_ICDF(25448), AOM_ICDF(28730), AOM_ICDF(29936), AOM_ICDF(31536),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7552), AOM_ICDF(15309), AOM_ICDF(16645), AOM_ICDF(19760),
- AOM_ICDF(20653), AOM_ICDF(21650), AOM_ICDF(22221), AOM_ICDF(23273),
- AOM_ICDF(25509), AOM_ICDF(28683), AOM_ICDF(30153), AOM_ICDF(31192),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5248), AOM_ICDF(6840), AOM_ICDF(16129), AOM_ICDF(17940),
- AOM_ICDF(19069), AOM_ICDF(19660), AOM_ICDF(20588), AOM_ICDF(22760),
- AOM_ICDF(23927), AOM_ICDF(27538), AOM_ICDF(28397), AOM_ICDF(30725),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11008), AOM_ICDF(11903), AOM_ICDF(13794), AOM_ICDF(21320),
- AOM_ICDF(21931), AOM_ICDF(22310), AOM_ICDF(22546), AOM_ICDF(25375),
- AOM_ICDF(27347), AOM_ICDF(29800), AOM_ICDF(30761), AOM_ICDF(31833),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(8678), AOM_ICDF(10313), AOM_ICDF(13073),
- AOM_ICDF(16823), AOM_ICDF(19980), AOM_ICDF(21520), AOM_ICDF(23242),
- AOM_ICDF(25344), AOM_ICDF(28797), AOM_ICDF(30405), AOM_ICDF(31940),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(10835), AOM_ICDF(12653), AOM_ICDF(16345),
- AOM_ICDF(19574), AOM_ICDF(24868), AOM_ICDF(25937), AOM_ICDF(27299),
- AOM_ICDF(31104), AOM_ICDF(31448), AOM_ICDF(31580), AOM_ICDF(31679),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4992), AOM_ICDF(6458), AOM_ICDF(9945), AOM_ICDF(11961),
- AOM_ICDF(16136), AOM_ICDF(17677), AOM_ICDF(20946), AOM_ICDF(23042),
- AOM_ICDF(24475), AOM_ICDF(28304), AOM_ICDF(29748), AOM_ICDF(31791),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(11879), AOM_ICDF(14703), AOM_ICDF(17653),
- AOM_ICDF(19176), AOM_ICDF(20185), AOM_ICDF(20880), AOM_ICDF(25194),
- AOM_ICDF(26584), AOM_ICDF(29655), AOM_ICDF(30430), AOM_ICDF(32044),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9856), AOM_ICDF(11385), AOM_ICDF(13457), AOM_ICDF(18705),
- AOM_ICDF(19577), AOM_ICDF(20266), AOM_ICDF(20746), AOM_ICDF(22207),
- AOM_ICDF(26724), AOM_ICDF(29431), AOM_ICDF(30645), AOM_ICDF(31604),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(9318), AOM_ICDF(11569), AOM_ICDF(14812),
- AOM_ICDF(16351), AOM_ICDF(16619), AOM_ICDF(17537), AOM_ICDF(19596),
- AOM_ICDF(22025), AOM_ICDF(27384), AOM_ICDF(29277), AOM_ICDF(31422),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5888), AOM_ICDF(9348), AOM_ICDF(11416), AOM_ICDF(14690),
- AOM_ICDF(16254), AOM_ICDF(16633), AOM_ICDF(17457), AOM_ICDF(19031),
- AOM_ICDF(21875), AOM_ICDF(27080), AOM_ICDF(29442), AOM_ICDF(31193),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(9329), AOM_ICDF(12218), AOM_ICDF(15177),
- AOM_ICDF(16806), AOM_ICDF(16998), AOM_ICDF(17991), AOM_ICDF(20005),
- AOM_ICDF(21952), AOM_ICDF(27108), AOM_ICDF(28867), AOM_ICDF(31657),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(9098), AOM_ICDF(13132), AOM_ICDF(17701),
- AOM_ICDF(18739), AOM_ICDF(19534), AOM_ICDF(20415), AOM_ICDF(22136),
- AOM_ICDF(24213), AOM_ICDF(27199), AOM_ICDF(28504), AOM_ICDF(29960),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(7296), AOM_ICDF(9304), AOM_ICDF(11772), AOM_ICDF(12529),
- AOM_ICDF(18014), AOM_ICDF(20418), AOM_ICDF(23076), AOM_ICDF(24662),
- AOM_ICDF(25549), AOM_ICDF(29074), AOM_ICDF(30392), AOM_ICDF(31773),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(11687), AOM_ICDF(13541), AOM_ICDF(14431),
- AOM_ICDF(18214), AOM_ICDF(20761), AOM_ICDF(22269), AOM_ICDF(23320),
- AOM_ICDF(24633), AOM_ICDF(28339), AOM_ICDF(30193), AOM_ICDF(31268),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3584), AOM_ICDF(4428), AOM_ICDF(13496), AOM_ICDF(14189),
- AOM_ICDF(17372), AOM_ICDF(18617), AOM_ICDF(20609), AOM_ICDF(22615),
- AOM_ICDF(23270), AOM_ICDF(27280), AOM_ICDF(28305), AOM_ICDF(30602),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(8834), AOM_ICDF(10499), AOM_ICDF(14357),
- AOM_ICDF(17671), AOM_ICDF(19150), AOM_ICDF(20460), AOM_ICDF(23235),
- AOM_ICDF(24391), AOM_ICDF(28351), AOM_ICDF(29843), AOM_ICDF(31481),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(5888), AOM_ICDF(7093), AOM_ICDF(7902),
- AOM_ICDF(18290), AOM_ICDF(22123), AOM_ICDF(24511), AOM_ICDF(25532),
- AOM_ICDF(26360), AOM_ICDF(29653), AOM_ICDF(30954), AOM_ICDF(32215),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7296), AOM_ICDF(10176), AOM_ICDF(11780), AOM_ICDF(12824),
- AOM_ICDF(19608), AOM_ICDF(25882), AOM_ICDF(28520), AOM_ICDF(29445),
- AOM_ICDF(31106), AOM_ICDF(31573), AOM_ICDF(31775), AOM_ICDF(31872),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(4833), AOM_ICDF(7551), AOM_ICDF(8449),
- AOM_ICDF(16668), AOM_ICDF(18614), AOM_ICDF(23952), AOM_ICDF(25668),
- AOM_ICDF(26721), AOM_ICDF(29888), AOM_ICDF(30697), AOM_ICDF(32090),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(8011), AOM_ICDF(11083), AOM_ICDF(12427),
- AOM_ICDF(16188), AOM_ICDF(17548), AOM_ICDF(19625), AOM_ICDF(23787),
- AOM_ICDF(24792), AOM_ICDF(28649), AOM_ICDF(29872), AOM_ICDF(31845),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(9170), AOM_ICDF(10655), AOM_ICDF(12439),
- AOM_ICDF(15550), AOM_ICDF(18128), AOM_ICDF(19565), AOM_ICDF(21412),
- AOM_ICDF(23355), AOM_ICDF(28007), AOM_ICDF(30080), AOM_ICDF(31568),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5504), AOM_ICDF(7636), AOM_ICDF(10174), AOM_ICDF(11056),
- AOM_ICDF(15562), AOM_ICDF(16252), AOM_ICDF(17931), AOM_ICDF(19598),
- AOM_ICDF(20967), AOM_ICDF(26845), AOM_ICDF(29149), AOM_ICDF(31490),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5248), AOM_ICDF(7810), AOM_ICDF(10004), AOM_ICDF(11015),
- AOM_ICDF(15359), AOM_ICDF(16310), AOM_ICDF(17834), AOM_ICDF(19185),
- AOM_ICDF(20903), AOM_ICDF(26728), AOM_ICDF(29585), AOM_ICDF(31478),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(7322), AOM_ICDF(10592), AOM_ICDF(11694),
- AOM_ICDF(15586), AOM_ICDF(16103), AOM_ICDF(17999), AOM_ICDF(19740),
- AOM_ICDF(20950), AOM_ICDF(26894), AOM_ICDF(28912), AOM_ICDF(31591),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4608), AOM_ICDF(7276), AOM_ICDF(12153), AOM_ICDF(13388),
- AOM_ICDF(16091), AOM_ICDF(17970), AOM_ICDF(19548), AOM_ICDF(21175),
- AOM_ICDF(22481), AOM_ICDF(26543), AOM_ICDF(28212), AOM_ICDF(29908),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(6656), AOM_ICDF(12225), AOM_ICDF(14441), AOM_ICDF(15158),
- AOM_ICDF(19600), AOM_ICDF(27127), AOM_ICDF(28221), AOM_ICDF(29186),
- AOM_ICDF(30439), AOM_ICDF(30913), AOM_ICDF(31135), AOM_ICDF(31238),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6400), AOM_ICDF(14608), AOM_ICDF(15920), AOM_ICDF(16643),
- AOM_ICDF(20149), AOM_ICDF(27328), AOM_ICDF(27896), AOM_ICDF(28672),
- AOM_ICDF(30227), AOM_ICDF(30778), AOM_ICDF(31053), AOM_ICDF(31120),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(6925), AOM_ICDF(14671), AOM_ICDF(15709),
- AOM_ICDF(19830), AOM_ICDF(24216), AOM_ICDF(25507), AOM_ICDF(27459),
- AOM_ICDF(28552), AOM_ICDF(29569), AOM_ICDF(29808), AOM_ICDF(30169),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(13604), AOM_ICDF(15202), AOM_ICDF(17530),
- AOM_ICDF(20878), AOM_ICDF(24279), AOM_ICDF(25278), AOM_ICDF(28255),
- AOM_ICDF(30651), AOM_ICDF(31170), AOM_ICDF(31343), AOM_ICDF(31410),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4608), AOM_ICDF(8535), AOM_ICDF(9588), AOM_ICDF(10740),
- AOM_ICDF(18673), AOM_ICDF(27664), AOM_ICDF(28826), AOM_ICDF(29828),
- AOM_ICDF(31081), AOM_ICDF(31503), AOM_ICDF(31680), AOM_ICDF(31778),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4864), AOM_ICDF(10543), AOM_ICDF(11313), AOM_ICDF(12197),
- AOM_ICDF(16785), AOM_ICDF(27858), AOM_ICDF(28556), AOM_ICDF(29480),
- AOM_ICDF(30892), AOM_ICDF(31486), AOM_ICDF(31722), AOM_ICDF(31787),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3968), AOM_ICDF(7492), AOM_ICDF(10283), AOM_ICDF(11318),
- AOM_ICDF(18486), AOM_ICDF(24061), AOM_ICDF(26761), AOM_ICDF(28456),
- AOM_ICDF(30126), AOM_ICDF(30872), AOM_ICDF(31088), AOM_ICDF(31305),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(10246), AOM_ICDF(12999), AOM_ICDF(15083),
- AOM_ICDF(18769), AOM_ICDF(22398), AOM_ICDF(23584), AOM_ICDF(27098),
- AOM_ICDF(29574), AOM_ICDF(30609), AOM_ICDF(30898), AOM_ICDF(31200),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7808), AOM_ICDF(13404), AOM_ICDF(14723), AOM_ICDF(16413),
- AOM_ICDF(20186), AOM_ICDF(24739), AOM_ICDF(25407), AOM_ICDF(27106),
- AOM_ICDF(29929), AOM_ICDF(30507), AOM_ICDF(30827), AOM_ICDF(30915),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2816), AOM_ICDF(6530), AOM_ICDF(8123), AOM_ICDF(9240),
- AOM_ICDF(12536), AOM_ICDF(17593), AOM_ICDF(18754), AOM_ICDF(20319),
- AOM_ICDF(22070), AOM_ICDF(27037), AOM_ICDF(29332), AOM_ICDF(30779),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2432), AOM_ICDF(6577), AOM_ICDF(8010), AOM_ICDF(9215),
- AOM_ICDF(12657), AOM_ICDF(18898), AOM_ICDF(19588), AOM_ICDF(20953),
- AOM_ICDF(22766), AOM_ICDF(27231), AOM_ICDF(29927), AOM_ICDF(31109),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3200), AOM_ICDF(6974), AOM_ICDF(9162), AOM_ICDF(10450),
- AOM_ICDF(13818), AOM_ICDF(17757), AOM_ICDF(19119), AOM_ICDF(20842),
- AOM_ICDF(22269), AOM_ICDF(27170), AOM_ICDF(29271), AOM_ICDF(30804),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(10689), AOM_ICDF(15307), AOM_ICDF(16589),
- AOM_ICDF(19738), AOM_ICDF(24416), AOM_ICDF(25332), AOM_ICDF(26541),
- AOM_ICDF(28634), AOM_ICDF(29711), AOM_ICDF(29913), AOM_ICDF(30116),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(9600), AOM_ICDF(11066), AOM_ICDF(15832), AOM_ICDF(16515),
- AOM_ICDF(18844), AOM_ICDF(19883), AOM_ICDF(24302), AOM_ICDF(25759),
- AOM_ICDF(26358), AOM_ICDF(29290), AOM_ICDF(30262), AOM_ICDF(31682),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8832), AOM_ICDF(12814), AOM_ICDF(16171), AOM_ICDF(17041),
- AOM_ICDF(19066), AOM_ICDF(20145), AOM_ICDF(22933), AOM_ICDF(24074),
- AOM_ICDF(25006), AOM_ICDF(28115), AOM_ICDF(29722), AOM_ICDF(30991),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(4486), AOM_ICDF(15821), AOM_ICDF(16330),
- AOM_ICDF(18461), AOM_ICDF(18879), AOM_ICDF(22436), AOM_ICDF(25051),
- AOM_ICDF(25443), AOM_ICDF(28637), AOM_ICDF(29396), AOM_ICDF(31412),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9856), AOM_ICDF(10863), AOM_ICDF(14050), AOM_ICDF(15920),
- AOM_ICDF(18783), AOM_ICDF(19531), AOM_ICDF(22502), AOM_ICDF(24577),
- AOM_ICDF(25361), AOM_ICDF(28559), AOM_ICDF(29600), AOM_ICDF(31336),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(7620), AOM_ICDF(10182), AOM_ICDF(11199),
- AOM_ICDF(17281), AOM_ICDF(19946), AOM_ICDF(23885), AOM_ICDF(25333),
- AOM_ICDF(26130), AOM_ICDF(29425), AOM_ICDF(30332), AOM_ICDF(31948),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9728), AOM_ICDF(11821), AOM_ICDF(13954), AOM_ICDF(15233),
- AOM_ICDF(19855), AOM_ICDF(24478), AOM_ICDF(28675), AOM_ICDF(29878),
- AOM_ICDF(31238), AOM_ICDF(31741), AOM_ICDF(31874), AOM_ICDF(32048),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(5753), AOM_ICDF(9673), AOM_ICDF(10149),
- AOM_ICDF(14343), AOM_ICDF(15190), AOM_ICDF(24967), AOM_ICDF(26378),
- AOM_ICDF(26841), AOM_ICDF(29749), AOM_ICDF(30527), AOM_ICDF(32120),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5888), AOM_ICDF(6606), AOM_ICDF(11498), AOM_ICDF(12538),
- AOM_ICDF(14737), AOM_ICDF(15425), AOM_ICDF(19549), AOM_ICDF(24047),
- AOM_ICDF(24765), AOM_ICDF(28711), AOM_ICDF(29822), AOM_ICDF(32138),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10368), AOM_ICDF(11757), AOM_ICDF(14126), AOM_ICDF(15474),
- AOM_ICDF(18311), AOM_ICDF(19358), AOM_ICDF(21539), AOM_ICDF(23451),
- AOM_ICDF(25034), AOM_ICDF(28791), AOM_ICDF(30035), AOM_ICDF(31280),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(7623), AOM_ICDF(11378), AOM_ICDF(12248),
- AOM_ICDF(15171), AOM_ICDF(15459), AOM_ICDF(18958), AOM_ICDF(20875),
- AOM_ICDF(21955), AOM_ICDF(27411), AOM_ICDF(29196), AOM_ICDF(31723),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(7469), AOM_ICDF(11399), AOM_ICDF(12323),
- AOM_ICDF(15165), AOM_ICDF(15528), AOM_ICDF(18804), AOM_ICDF(20769),
- AOM_ICDF(21767), AOM_ICDF(27129), AOM_ICDF(29435), AOM_ICDF(31502),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7040), AOM_ICDF(8295), AOM_ICDF(12298), AOM_ICDF(13035),
- AOM_ICDF(15194), AOM_ICDF(15357), AOM_ICDF(18976), AOM_ICDF(21100),
- AOM_ICDF(21805), AOM_ICDF(26978), AOM_ICDF(28342), AOM_ICDF(31763),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5632), AOM_ICDF(7465), AOM_ICDF(14220), AOM_ICDF(15035),
- AOM_ICDF(17014), AOM_ICDF(18105), AOM_ICDF(21111), AOM_ICDF(23027),
- AOM_ICDF(23934), AOM_ICDF(27207), AOM_ICDF(28293), AOM_ICDF(30330),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(11008), AOM_ICDF(13089), AOM_ICDF(17144), AOM_ICDF(18425),
- AOM_ICDF(19954), AOM_ICDF(20624), AOM_ICDF(21658), AOM_ICDF(24229),
- AOM_ICDF(25290), AOM_ICDF(28803), AOM_ICDF(29938), AOM_ICDF(31493),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(14218), AOM_ICDF(16378), AOM_ICDF(17699),
- AOM_ICDF(18935), AOM_ICDF(19928), AOM_ICDF(20524), AOM_ICDF(22781),
- AOM_ICDF(24155), AOM_ICDF(27523), AOM_ICDF(29068), AOM_ICDF(30270),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6144), AOM_ICDF(7194), AOM_ICDF(17912), AOM_ICDF(18991),
- AOM_ICDF(19879), AOM_ICDF(20151), AOM_ICDF(21170), AOM_ICDF(23938),
- AOM_ICDF(24712), AOM_ICDF(27763), AOM_ICDF(28556), AOM_ICDF(30584),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(11614), AOM_ICDF(13652), AOM_ICDF(16928),
- AOM_ICDF(18425), AOM_ICDF(18967), AOM_ICDF(19724), AOM_ICDF(23817),
- AOM_ICDF(25594), AOM_ICDF(28685), AOM_ICDF(29734), AOM_ICDF(30941),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7296), AOM_ICDF(8915), AOM_ICDF(11163), AOM_ICDF(13821),
- AOM_ICDF(16951), AOM_ICDF(18507), AOM_ICDF(20180), AOM_ICDF(22423),
- AOM_ICDF(24017), AOM_ICDF(28294), AOM_ICDF(29614), AOM_ICDF(31673),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9728), AOM_ICDF(13441), AOM_ICDF(15858), AOM_ICDF(18860),
- AOM_ICDF(21713), AOM_ICDF(24478), AOM_ICDF(25995), AOM_ICDF(28233),
- AOM_ICDF(30347), AOM_ICDF(30853), AOM_ICDF(31081), AOM_ICDF(31328),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6400), AOM_ICDF(7480), AOM_ICDF(11482), AOM_ICDF(13206),
- AOM_ICDF(16199), AOM_ICDF(16908), AOM_ICDF(20436), AOM_ICDF(23507),
- AOM_ICDF(24650), AOM_ICDF(28360), AOM_ICDF(29438), AOM_ICDF(31532),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9856), AOM_ICDF(10979), AOM_ICDF(13430), AOM_ICDF(15195),
- AOM_ICDF(15957), AOM_ICDF(16350), AOM_ICDF(16871), AOM_ICDF(26198),
- AOM_ICDF(26991), AOM_ICDF(29612), AOM_ICDF(30438), AOM_ICDF(31962),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(10529), AOM_ICDF(12640), AOM_ICDF(15350),
- AOM_ICDF(16987), AOM_ICDF(17859), AOM_ICDF(18590), AOM_ICDF(21400),
- AOM_ICDF(23812), AOM_ICDF(28188), AOM_ICDF(29589), AOM_ICDF(31280),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(8877), AOM_ICDF(12393), AOM_ICDF(14015),
- AOM_ICDF(15655), AOM_ICDF(15794), AOM_ICDF(16814), AOM_ICDF(19923),
- AOM_ICDF(21086), AOM_ICDF(26723), AOM_ICDF(28669), AOM_ICDF(31468),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6400), AOM_ICDF(8900), AOM_ICDF(12241), AOM_ICDF(13828),
- AOM_ICDF(15513), AOM_ICDF(15671), AOM_ICDF(16500), AOM_ICDF(19257),
- AOM_ICDF(20456), AOM_ICDF(25984), AOM_ICDF(28658), AOM_ICDF(31017),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7296), AOM_ICDF(8820), AOM_ICDF(12885), AOM_ICDF(14441),
- AOM_ICDF(15813), AOM_ICDF(15911), AOM_ICDF(16954), AOM_ICDF(20026),
- AOM_ICDF(20950), AOM_ICDF(26563), AOM_ICDF(28140), AOM_ICDF(31673),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(8455), AOM_ICDF(13328), AOM_ICDF(15907),
- AOM_ICDF(17026), AOM_ICDF(17464), AOM_ICDF(18267), AOM_ICDF(21436),
- AOM_ICDF(22712), AOM_ICDF(26403), AOM_ICDF(27660), AOM_ICDF(29559),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(6784), AOM_ICDF(11216), AOM_ICDF(13269), AOM_ICDF(15677),
- AOM_ICDF(16931), AOM_ICDF(18445), AOM_ICDF(19097), AOM_ICDF(20082),
- AOM_ICDF(24298), AOM_ICDF(28236), AOM_ICDF(30118), AOM_ICDF(31448),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(13240), AOM_ICDF(14110), AOM_ICDF(16966),
- AOM_ICDF(17743), AOM_ICDF(18916), AOM_ICDF(19281), AOM_ICDF(19848),
- AOM_ICDF(25552), AOM_ICDF(28646), AOM_ICDF(30444), AOM_ICDF(31291),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4352), AOM_ICDF(6870), AOM_ICDF(14660), AOM_ICDF(16597),
- AOM_ICDF(17361), AOM_ICDF(18126), AOM_ICDF(18852), AOM_ICDF(20765),
- AOM_ICDF(23526), AOM_ICDF(27670), AOM_ICDF(29096), AOM_ICDF(31214),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9472), AOM_ICDF(11736), AOM_ICDF(13172), AOM_ICDF(18192),
- AOM_ICDF(19070), AOM_ICDF(19651), AOM_ICDF(19991), AOM_ICDF(21793),
- AOM_ICDF(26005), AOM_ICDF(29291), AOM_ICDF(30500), AOM_ICDF(31767),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(7252), AOM_ICDF(8651), AOM_ICDF(12379),
- AOM_ICDF(14936), AOM_ICDF(17493), AOM_ICDF(18326), AOM_ICDF(19527),
- AOM_ICDF(23655), AOM_ICDF(28031), AOM_ICDF(29960), AOM_ICDF(31773),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(11561), AOM_ICDF(12864), AOM_ICDF(15793),
- AOM_ICDF(18765), AOM_ICDF(23040), AOM_ICDF(23640), AOM_ICDF(24415),
- AOM_ICDF(31040), AOM_ICDF(31473), AOM_ICDF(31740), AOM_ICDF(31827),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(6825), AOM_ICDF(8810), AOM_ICDF(11269),
- AOM_ICDF(14257), AOM_ICDF(15716), AOM_ICDF(18397), AOM_ICDF(20006),
- AOM_ICDF(24020), AOM_ICDF(28230), AOM_ICDF(29780), AOM_ICDF(31773),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(9466), AOM_ICDF(11717), AOM_ICDF(15159),
- AOM_ICDF(16237), AOM_ICDF(17145), AOM_ICDF(17814), AOM_ICDF(21258),
- AOM_ICDF(24754), AOM_ICDF(28864), AOM_ICDF(30313), AOM_ICDF(32061),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7680), AOM_ICDF(10517), AOM_ICDF(11381), AOM_ICDF(16202),
- AOM_ICDF(16809), AOM_ICDF(17425), AOM_ICDF(17774), AOM_ICDF(18764),
- AOM_ICDF(26842), AOM_ICDF(29600), AOM_ICDF(31073), AOM_ICDF(31886),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4992), AOM_ICDF(8626), AOM_ICDF(10531), AOM_ICDF(13103),
- AOM_ICDF(14495), AOM_ICDF(14784), AOM_ICDF(15365), AOM_ICDF(16657),
- AOM_ICDF(21051), AOM_ICDF(27011), AOM_ICDF(29685), AOM_ICDF(31574),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4736), AOM_ICDF(9433), AOM_ICDF(10981), AOM_ICDF(13494),
- AOM_ICDF(14644), AOM_ICDF(15043), AOM_ICDF(15396), AOM_ICDF(16378),
- AOM_ICDF(21506), AOM_ICDF(26869), AOM_ICDF(29824), AOM_ICDF(31454),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(9526), AOM_ICDF(11905), AOM_ICDF(14476),
- AOM_ICDF(15722), AOM_ICDF(16103), AOM_ICDF(16768), AOM_ICDF(18070),
- AOM_ICDF(21630), AOM_ICDF(27401), AOM_ICDF(29592), AOM_ICDF(31818),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(9947), AOM_ICDF(12386), AOM_ICDF(15909),
- AOM_ICDF(16496), AOM_ICDF(17397), AOM_ICDF(17866), AOM_ICDF(18927),
- AOM_ICDF(24408), AOM_ICDF(27750), AOM_ICDF(29614), AOM_ICDF(30889),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(7424), AOM_ICDF(10538), AOM_ICDF(14098), AOM_ICDF(14891),
- AOM_ICDF(16486), AOM_ICDF(16756), AOM_ICDF(17607), AOM_ICDF(18952),
- AOM_ICDF(20168), AOM_ICDF(26275), AOM_ICDF(28617), AOM_ICDF(31580),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(13070), AOM_ICDF(14969), AOM_ICDF(15848),
- AOM_ICDF(17197), AOM_ICDF(17447), AOM_ICDF(17954), AOM_ICDF(18747),
- AOM_ICDF(20137), AOM_ICDF(25628), AOM_ICDF(28753), AOM_ICDF(30628),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3584), AOM_ICDF(5287), AOM_ICDF(16141), AOM_ICDF(16840),
- AOM_ICDF(17670), AOM_ICDF(17760), AOM_ICDF(18532), AOM_ICDF(20387),
- AOM_ICDF(21102), AOM_ICDF(26118), AOM_ICDF(27535), AOM_ICDF(30830),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(9594), AOM_ICDF(11770), AOM_ICDF(14505),
- AOM_ICDF(16234), AOM_ICDF(16365), AOM_ICDF(17201), AOM_ICDF(20286),
- AOM_ICDF(22128), AOM_ICDF(27371), AOM_ICDF(29426), AOM_ICDF(31580),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5632), AOM_ICDF(8393), AOM_ICDF(10566), AOM_ICDF(11917),
- AOM_ICDF(16025), AOM_ICDF(16697), AOM_ICDF(18123), AOM_ICDF(19541),
- AOM_ICDF(21135), AOM_ICDF(27059), AOM_ICDF(29325), AOM_ICDF(31814),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(7916), AOM_ICDF(9526), AOM_ICDF(11010),
- AOM_ICDF(14114), AOM_ICDF(18169), AOM_ICDF(19510), AOM_ICDF(21031),
- AOM_ICDF(23083), AOM_ICDF(27769), AOM_ICDF(29782), AOM_ICDF(31299),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(7338), AOM_ICDF(10657), AOM_ICDF(11699),
- AOM_ICDF(14780), AOM_ICDF(15070), AOM_ICDF(18291), AOM_ICDF(20170),
- AOM_ICDF(21347), AOM_ICDF(26985), AOM_ICDF(28811), AOM_ICDF(31805),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5632), AOM_ICDF(7669), AOM_ICDF(11558), AOM_ICDF(12653),
- AOM_ICDF(13962), AOM_ICDF(14116), AOM_ICDF(15074), AOM_ICDF(19886),
- AOM_ICDF(21123), AOM_ICDF(26953), AOM_ICDF(28755), AOM_ICDF(31708),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(9739), AOM_ICDF(11612), AOM_ICDF(13211),
- AOM_ICDF(14992), AOM_ICDF(15237), AOM_ICDF(16016), AOM_ICDF(17677),
- AOM_ICDF(20588), AOM_ICDF(26647), AOM_ICDF(29116), AOM_ICDF(31435),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(8346), AOM_ICDF(11022), AOM_ICDF(11976),
- AOM_ICDF(13541), AOM_ICDF(13749), AOM_ICDF(14520), AOM_ICDF(16173),
- AOM_ICDF(17567), AOM_ICDF(25182), AOM_ICDF(28111), AOM_ICDF(31591),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4736), AOM_ICDF(8894), AOM_ICDF(11294), AOM_ICDF(12220),
- AOM_ICDF(13753), AOM_ICDF(14029), AOM_ICDF(14645), AOM_ICDF(16065),
- AOM_ICDF(17621), AOM_ICDF(24911), AOM_ICDF(28655), AOM_ICDF(31344),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(8193), AOM_ICDF(11667), AOM_ICDF(12461),
- AOM_ICDF(13880), AOM_ICDF(14040), AOM_ICDF(14946), AOM_ICDF(16537),
- AOM_ICDF(17642), AOM_ICDF(25117), AOM_ICDF(27333), AOM_ICDF(31713),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4096), AOM_ICDF(8479), AOM_ICDF(13751), AOM_ICDF(14813),
- AOM_ICDF(15994), AOM_ICDF(16157), AOM_ICDF(16905), AOM_ICDF(18314),
- AOM_ICDF(19575), AOM_ICDF(25132), AOM_ICDF(27445), AOM_ICDF(30192),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(7936), AOM_ICDF(12263), AOM_ICDF(15558), AOM_ICDF(16331),
- AOM_ICDF(17779), AOM_ICDF(18148), AOM_ICDF(18810), AOM_ICDF(19794),
- AOM_ICDF(21046), AOM_ICDF(26644), AOM_ICDF(29417), AOM_ICDF(31507),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(15025), AOM_ICDF(16457), AOM_ICDF(17074),
- AOM_ICDF(18079), AOM_ICDF(18299), AOM_ICDF(18648), AOM_ICDF(19240),
- AOM_ICDF(20612), AOM_ICDF(25687), AOM_ICDF(29392), AOM_ICDF(30842),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(6037), AOM_ICDF(17465), AOM_ICDF(18089),
- AOM_ICDF(18869), AOM_ICDF(18953), AOM_ICDF(19688), AOM_ICDF(21223),
- AOM_ICDF(21816), AOM_ICDF(26562), AOM_ICDF(28195), AOM_ICDF(30621),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(11255), AOM_ICDF(13307), AOM_ICDF(15676),
- AOM_ICDF(17392), AOM_ICDF(17603), AOM_ICDF(18268), AOM_ICDF(20783),
- AOM_ICDF(22646), AOM_ICDF(27628), AOM_ICDF(29737), AOM_ICDF(31628),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(9119), AOM_ICDF(11015), AOM_ICDF(12269),
- AOM_ICDF(16280), AOM_ICDF(17023), AOM_ICDF(18282), AOM_ICDF(19418),
- AOM_ICDF(21325), AOM_ICDF(27309), AOM_ICDF(30004), AOM_ICDF(31818),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3968), AOM_ICDF(9094), AOM_ICDF(10606), AOM_ICDF(12007),
- AOM_ICDF(14218), AOM_ICDF(18911), AOM_ICDF(20089), AOM_ICDF(20924),
- AOM_ICDF(23587), AOM_ICDF(27808), AOM_ICDF(30253), AOM_ICDF(31305),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(8627), AOM_ICDF(11201), AOM_ICDF(12200),
- AOM_ICDF(15305), AOM_ICDF(15671), AOM_ICDF(18639), AOM_ICDF(20185),
- AOM_ICDF(21627), AOM_ICDF(26990), AOM_ICDF(29449), AOM_ICDF(31723),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6272), AOM_ICDF(8768), AOM_ICDF(12320), AOM_ICDF(13296),
- AOM_ICDF(14643), AOM_ICDF(14970), AOM_ICDF(15760), AOM_ICDF(20545),
- AOM_ICDF(21863), AOM_ICDF(27473), AOM_ICDF(29535), AOM_ICDF(31836),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(10905), AOM_ICDF(12656), AOM_ICDF(14084),
- AOM_ICDF(15705), AOM_ICDF(16069), AOM_ICDF(16674), AOM_ICDF(17779),
- AOM_ICDF(21041), AOM_ICDF(26586), AOM_ICDF(29539), AOM_ICDF(31253),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5248), AOM_ICDF(9672), AOM_ICDF(12113), AOM_ICDF(12871),
- AOM_ICDF(14423), AOM_ICDF(14710), AOM_ICDF(15376), AOM_ICDF(16708),
- AOM_ICDF(18092), AOM_ICDF(25260), AOM_ICDF(28991), AOM_ICDF(31585),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4736), AOM_ICDF(10789), AOM_ICDF(13029), AOM_ICDF(13750),
- AOM_ICDF(15040), AOM_ICDF(15385), AOM_ICDF(15840), AOM_ICDF(16887),
- AOM_ICDF(18393), AOM_ICDF(25230), AOM_ICDF(29558), AOM_ICDF(31454),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(9916), AOM_ICDF(12938), AOM_ICDF(13741),
- AOM_ICDF(15030), AOM_ICDF(15297), AOM_ICDF(16116), AOM_ICDF(17333),
- AOM_ICDF(18672), AOM_ICDF(25954), AOM_ICDF(28498), AOM_ICDF(31618),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4608), AOM_ICDF(10266), AOM_ICDF(15450), AOM_ICDF(16299),
- AOM_ICDF(17114), AOM_ICDF(17288), AOM_ICDF(17775), AOM_ICDF(18835),
- AOM_ICDF(20227), AOM_ICDF(25199), AOM_ICDF(28098), AOM_ICDF(30018),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(7296), AOM_ICDF(9951), AOM_ICDF(14124), AOM_ICDF(14806),
- AOM_ICDF(16181), AOM_ICDF(16377), AOM_ICDF(17485), AOM_ICDF(19069),
- AOM_ICDF(20078), AOM_ICDF(26051), AOM_ICDF(27777), AOM_ICDF(31574),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(13823), AOM_ICDF(15889), AOM_ICDF(16620),
- AOM_ICDF(17709), AOM_ICDF(17881), AOM_ICDF(18327), AOM_ICDF(19140),
- AOM_ICDF(20374), AOM_ICDF(25685), AOM_ICDF(28160), AOM_ICDF(30521),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3200), AOM_ICDF(4602), AOM_ICDF(16404), AOM_ICDF(17042),
- AOM_ICDF(17780), AOM_ICDF(17829), AOM_ICDF(18706), AOM_ICDF(20608),
- AOM_ICDF(21115), AOM_ICDF(25884), AOM_ICDF(26960), AOM_ICDF(30804),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7040), AOM_ICDF(9444), AOM_ICDF(11770), AOM_ICDF(14321),
- AOM_ICDF(15951), AOM_ICDF(16074), AOM_ICDF(17033), AOM_ICDF(20352),
- AOM_ICDF(22301), AOM_ICDF(27567), AOM_ICDF(29151), AOM_ICDF(31662),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6016), AOM_ICDF(8316), AOM_ICDF(10849), AOM_ICDF(12136),
- AOM_ICDF(15860), AOM_ICDF(16430), AOM_ICDF(17935), AOM_ICDF(19659),
- AOM_ICDF(21083), AOM_ICDF(26968), AOM_ICDF(28839), AOM_ICDF(31618),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(7472), AOM_ICDF(9436), AOM_ICDF(11038),
- AOM_ICDF(13625), AOM_ICDF(17596), AOM_ICDF(18959), AOM_ICDF(20543),
- AOM_ICDF(22879), AOM_ICDF(27487), AOM_ICDF(29351), AOM_ICDF(31186),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(7117), AOM_ICDF(11424), AOM_ICDF(12381),
- AOM_ICDF(14823), AOM_ICDF(15053), AOM_ICDF(18656), AOM_ICDF(20818),
- AOM_ICDF(21722), AOM_ICDF(27042), AOM_ICDF(28233), AOM_ICDF(31591),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(7281), AOM_ICDF(11910), AOM_ICDF(12912),
- AOM_ICDF(14229), AOM_ICDF(14391), AOM_ICDF(15474), AOM_ICDF(20113),
- AOM_ICDF(21128), AOM_ICDF(26627), AOM_ICDF(28077), AOM_ICDF(31713),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(9452), AOM_ICDF(11526), AOM_ICDF(13288),
- AOM_ICDF(14861), AOM_ICDF(15062), AOM_ICDF(15909), AOM_ICDF(17695),
- AOM_ICDF(20429), AOM_ICDF(26225), AOM_ICDF(28603), AOM_ICDF(31340),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5376), AOM_ICDF(7722), AOM_ICDF(10921), AOM_ICDF(11813),
- AOM_ICDF(13222), AOM_ICDF(13348), AOM_ICDF(14211), AOM_ICDF(15976),
- AOM_ICDF(17110), AOM_ICDF(24634), AOM_ICDF(27176), AOM_ICDF(31484),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4736), AOM_ICDF(8226), AOM_ICDF(11137), AOM_ICDF(11988),
- AOM_ICDF(13518), AOM_ICDF(13706), AOM_ICDF(14332), AOM_ICDF(16016),
- AOM_ICDF(17301), AOM_ICDF(24641), AOM_ICDF(27704), AOM_ICDF(31016),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(7592), AOM_ICDF(11880), AOM_ICDF(12612),
- AOM_ICDF(13738), AOM_ICDF(13813), AOM_ICDF(14681), AOM_ICDF(16392),
- AOM_ICDF(17306), AOM_ICDF(24619), AOM_ICDF(26334), AOM_ICDF(31818),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4096), AOM_ICDF(8524), AOM_ICDF(14316), AOM_ICDF(15392),
- AOM_ICDF(16295), AOM_ICDF(16433), AOM_ICDF(17197), AOM_ICDF(18718),
- AOM_ICDF(19924), AOM_ICDF(25123), AOM_ICDF(26953), AOM_ICDF(29856),
- AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(6528), AOM_ICDF(13383), AOM_ICDF(17642), AOM_ICDF(18342),
- AOM_ICDF(19224), AOM_ICDF(20209), AOM_ICDF(20899), AOM_ICDF(21944),
- AOM_ICDF(23137), AOM_ICDF(25966), AOM_ICDF(27429), AOM_ICDF(28463),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4480), AOM_ICDF(16901), AOM_ICDF(18876), AOM_ICDF(19560),
- AOM_ICDF(20257), AOM_ICDF(20912), AOM_ICDF(21169), AOM_ICDF(21959),
- AOM_ICDF(23036), AOM_ICDF(25781), AOM_ICDF(27676), AOM_ICDF(28569),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2688), AOM_ICDF(5337), AOM_ICDF(18178), AOM_ICDF(18829),
- AOM_ICDF(19344), AOM_ICDF(19628), AOM_ICDF(20267), AOM_ICDF(22135),
- AOM_ICDF(22671), AOM_ICDF(25817), AOM_ICDF(26914), AOM_ICDF(28773),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8192), AOM_ICDF(11378), AOM_ICDF(14742), AOM_ICDF(17269),
- AOM_ICDF(18230), AOM_ICDF(19001), AOM_ICDF(19655), AOM_ICDF(22949),
- AOM_ICDF(24337), AOM_ICDF(28025), AOM_ICDF(29503), AOM_ICDF(30848),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(10133), AOM_ICDF(13144), AOM_ICDF(14374),
- AOM_ICDF(17020), AOM_ICDF(18920), AOM_ICDF(20235), AOM_ICDF(21677),
- AOM_ICDF(23142), AOM_ICDF(27131), AOM_ICDF(28671), AOM_ICDF(30284),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(15588), AOM_ICDF(18431), AOM_ICDF(19723),
- AOM_ICDF(21455), AOM_ICDF(24705), AOM_ICDF(25461), AOM_ICDF(26753),
- AOM_ICDF(28923), AOM_ICDF(29475), AOM_ICDF(29729), AOM_ICDF(29897),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4224), AOM_ICDF(8689), AOM_ICDF(13024), AOM_ICDF(13658),
- AOM_ICDF(16637), AOM_ICDF(17307), AOM_ICDF(20836), AOM_ICDF(22665),
- AOM_ICDF(23673), AOM_ICDF(27015), AOM_ICDF(28310), AOM_ICDF(30203),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5120), AOM_ICDF(7896), AOM_ICDF(13618), AOM_ICDF(14900),
- AOM_ICDF(15708), AOM_ICDF(16153), AOM_ICDF(16997), AOM_ICDF(23625),
- AOM_ICDF(24466), AOM_ICDF(27719), AOM_ICDF(28892), AOM_ICDF(30500),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5760), AOM_ICDF(11305), AOM_ICDF(13669), AOM_ICDF(15462),
- AOM_ICDF(16564), AOM_ICDF(17683), AOM_ICDF(18252), AOM_ICDF(20073),
- AOM_ICDF(22917), AOM_ICDF(27005), AOM_ICDF(28923), AOM_ICDF(30236),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4224), AOM_ICDF(9510), AOM_ICDF(13787), AOM_ICDF(14587),
- AOM_ICDF(15753), AOM_ICDF(15925), AOM_ICDF(16513), AOM_ICDF(18193),
- AOM_ICDF(19490), AOM_ICDF(24944), AOM_ICDF(27482), AOM_ICDF(29757),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3840), AOM_ICDF(10052), AOM_ICDF(14106), AOM_ICDF(14887),
- AOM_ICDF(15827), AOM_ICDF(15996), AOM_ICDF(16522), AOM_ICDF(17939),
- AOM_ICDF(19204), AOM_ICDF(24508), AOM_ICDF(27661), AOM_ICDF(29491),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(4736), AOM_ICDF(9676), AOM_ICDF(14492), AOM_ICDF(15163),
- AOM_ICDF(16179), AOM_ICDF(16390), AOM_ICDF(17133), AOM_ICDF(18905),
- AOM_ICDF(19864), AOM_ICDF(25185), AOM_ICDF(27191), AOM_ICDF(30030),
- AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(3584), AOM_ICDF(9370), AOM_ICDF(14746), AOM_ICDF(15820),
- AOM_ICDF(16708), AOM_ICDF(17224), AOM_ICDF(17718), AOM_ICDF(19329),
- AOM_ICDF(20405), AOM_ICDF(23541), AOM_ICDF(25258), AOM_ICDF(26726),
- AOM_ICDF(32768), 0,
- },
- },
-#else
- {
- {
- AOM_ICDF(15488), AOM_ICDF(18706), AOM_ICDF(22561), AOM_ICDF(23619),
- AOM_ICDF(24954), AOM_ICDF(25782), AOM_ICDF(26710), AOM_ICDF(27861),
- AOM_ICDF(28656), AOM_ICDF(30743), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11648), AOM_ICDF(18744), AOM_ICDF(20846), AOM_ICDF(22100),
- AOM_ICDF(23332), AOM_ICDF(24337), AOM_ICDF(25093), AOM_ICDF(26104),
- AOM_ICDF(27097), AOM_ICDF(29633), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(10732), AOM_ICDF(22507), AOM_ICDF(23254),
- AOM_ICDF(24382), AOM_ICDF(24876), AOM_ICDF(25827), AOM_ICDF(27488),
- AOM_ICDF(28040), AOM_ICDF(30108), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(13568), AOM_ICDF(16981), AOM_ICDF(19885), AOM_ICDF(22014),
- AOM_ICDF(23543), AOM_ICDF(24658), AOM_ICDF(25641), AOM_ICDF(27378),
- AOM_ICDF(28625), AOM_ICDF(31043), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(12225), AOM_ICDF(14408), AOM_ICDF(16033),
- AOM_ICDF(19544), AOM_ICDF(22318), AOM_ICDF(23960), AOM_ICDF(25617),
- AOM_ICDF(26522), AOM_ICDF(30596), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12160), AOM_ICDF(15078), AOM_ICDF(16990), AOM_ICDF(18964),
- AOM_ICDF(22725), AOM_ICDF(25793), AOM_ICDF(27133), AOM_ICDF(28447),
- AOM_ICDF(30831), AOM_ICDF(30836), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(11274), AOM_ICDF(15818), AOM_ICDF(16940),
- AOM_ICDF(21178), AOM_ICDF(22338), AOM_ICDF(26171), AOM_ICDF(27754),
- AOM_ICDF(28503), AOM_ICDF(31473), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10880), AOM_ICDF(13846), AOM_ICDF(18649), AOM_ICDF(20252),
- AOM_ICDF(22157), AOM_ICDF(22992), AOM_ICDF(24396), AOM_ICDF(27581),
- AOM_ICDF(28501), AOM_ICDF(31400), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11008), AOM_ICDF(13462), AOM_ICDF(15747), AOM_ICDF(18378),
- AOM_ICDF(20085), AOM_ICDF(21663), AOM_ICDF(22766), AOM_ICDF(24635),
- AOM_ICDF(27476), AOM_ICDF(30643), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10112), AOM_ICDF(13147), AOM_ICDF(16135), AOM_ICDF(17577),
- AOM_ICDF(19681), AOM_ICDF(19689), AOM_ICDF(20856), AOM_ICDF(22374),
- AOM_ICDF(24454), AOM_ICDF(30555), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(12176), AOM_ICDF(17582), AOM_ICDF(18905),
- AOM_ICDF(19994), AOM_ICDF(20669), AOM_ICDF(21635), AOM_ICDF(23564),
- AOM_ICDF(24741), AOM_ICDF(27222), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(8448), AOM_ICDF(18738), AOM_ICDF(21694), AOM_ICDF(22413),
- AOM_ICDF(23358), AOM_ICDF(24675), AOM_ICDF(25193), AOM_ICDF(26119),
- AOM_ICDF(27310), AOM_ICDF(30773), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(22027), AOM_ICDF(23242), AOM_ICDF(23986),
- AOM_ICDF(24529), AOM_ICDF(25363), AOM_ICDF(25646), AOM_ICDF(26087),
- AOM_ICDF(27130), AOM_ICDF(30218), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(13862), AOM_ICDF(21137), AOM_ICDF(22124),
- AOM_ICDF(23036), AOM_ICDF(23803), AOM_ICDF(24458), AOM_ICDF(26390),
- AOM_ICDF(27342), AOM_ICDF(30968), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(17409), AOM_ICDF(19830), AOM_ICDF(21521),
- AOM_ICDF(22580), AOM_ICDF(23726), AOM_ICDF(24377), AOM_ICDF(25679),
- AOM_ICDF(27269), AOM_ICDF(30867), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(15832), AOM_ICDF(17559), AOM_ICDF(18777),
- AOM_ICDF(20425), AOM_ICDF(22719), AOM_ICDF(23447), AOM_ICDF(24952),
- AOM_ICDF(26527), AOM_ICDF(30950), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7808), AOM_ICDF(18730), AOM_ICDF(20143), AOM_ICDF(21445),
- AOM_ICDF(23347), AOM_ICDF(26267), AOM_ICDF(27229), AOM_ICDF(28315),
- AOM_ICDF(30911), AOM_ICDF(30915), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(14299), AOM_ICDF(17264), AOM_ICDF(18505),
- AOM_ICDF(20765), AOM_ICDF(22440), AOM_ICDF(24331), AOM_ICDF(26038),
- AOM_ICDF(27481), AOM_ICDF(31448), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8832), AOM_ICDF(15726), AOM_ICDF(19455), AOM_ICDF(20668),
- AOM_ICDF(21607), AOM_ICDF(22655), AOM_ICDF(23384), AOM_ICDF(26356),
- AOM_ICDF(27697), AOM_ICDF(31459), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8192), AOM_ICDF(17385), AOM_ICDF(18866), AOM_ICDF(20120),
- AOM_ICDF(21273), AOM_ICDF(22853), AOM_ICDF(23470), AOM_ICDF(24881),
- AOM_ICDF(27216), AOM_ICDF(31040), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(16341), AOM_ICDF(18497), AOM_ICDF(19439),
- AOM_ICDF(20706), AOM_ICDF(20711), AOM_ICDF(21234), AOM_ICDF(22307),
- AOM_ICDF(23950), AOM_ICDF(30728), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6400), AOM_ICDF(17625), AOM_ICDF(20326), AOM_ICDF(21821),
- AOM_ICDF(22568), AOM_ICDF(23415), AOM_ICDF(23854), AOM_ICDF(24896),
- AOM_ICDF(26171), AOM_ICDF(29575), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(12032), AOM_ICDF(14259), AOM_ICDF(22597), AOM_ICDF(23443),
- AOM_ICDF(24581), AOM_ICDF(25079), AOM_ICDF(26399), AOM_ICDF(27862),
- AOM_ICDF(28509), AOM_ICDF(30419), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9216), AOM_ICDF(14883), AOM_ICDF(20941), AOM_ICDF(21958),
- AOM_ICDF(23597), AOM_ICDF(24328), AOM_ICDF(25208), AOM_ICDF(26590),
- AOM_ICDF(27377), AOM_ICDF(29364), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(8088), AOM_ICDF(24407), AOM_ICDF(25006),
- AOM_ICDF(25777), AOM_ICDF(25950), AOM_ICDF(26882), AOM_ICDF(28811),
- AOM_ICDF(29159), AOM_ICDF(30636), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11904), AOM_ICDF(14425), AOM_ICDF(18729), AOM_ICDF(20730),
- AOM_ICDF(21998), AOM_ICDF(22686), AOM_ICDF(23856), AOM_ICDF(26580),
- AOM_ICDF(27613), AOM_ICDF(29834), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10752), AOM_ICDF(12784), AOM_ICDF(16305), AOM_ICDF(17624),
- AOM_ICDF(20320), AOM_ICDF(22450), AOM_ICDF(24380), AOM_ICDF(26773),
- AOM_ICDF(27837), AOM_ICDF(30016), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(14090), AOM_ICDF(18314), AOM_ICDF(20621),
- AOM_ICDF(23539), AOM_ICDF(25261), AOM_ICDF(26953), AOM_ICDF(28692),
- AOM_ICDF(30064), AOM_ICDF(30071), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(10229), AOM_ICDF(16542), AOM_ICDF(17725),
- AOM_ICDF(21504), AOM_ICDF(22332), AOM_ICDF(26006), AOM_ICDF(27895),
- AOM_ICDF(28487), AOM_ICDF(31248), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9728), AOM_ICDF(11162), AOM_ICDF(19379), AOM_ICDF(20981),
- AOM_ICDF(22356), AOM_ICDF(22926), AOM_ICDF(24318), AOM_ICDF(28364),
- AOM_ICDF(29020), AOM_ICDF(31328), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9216), AOM_ICDF(10861), AOM_ICDF(14850), AOM_ICDF(16471),
- AOM_ICDF(18611), AOM_ICDF(19674), AOM_ICDF(21009), AOM_ICDF(23454),
- AOM_ICDF(26078), AOM_ICDF(29272), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7808), AOM_ICDF(10132), AOM_ICDF(17327), AOM_ICDF(18472),
- AOM_ICDF(20126), AOM_ICDF(20132), AOM_ICDF(21599), AOM_ICDF(23338),
- AOM_ICDF(24514), AOM_ICDF(29843), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(9210), AOM_ICDF(19309), AOM_ICDF(20715),
- AOM_ICDF(21833), AOM_ICDF(22262), AOM_ICDF(23353), AOM_ICDF(24942),
- AOM_ICDF(25800), AOM_ICDF(28200), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(12288), AOM_ICDF(15040), AOM_ICDF(18401), AOM_ICDF(21071),
- AOM_ICDF(22800), AOM_ICDF(23945), AOM_ICDF(25274), AOM_ICDF(26939),
- AOM_ICDF(28554), AOM_ICDF(31328), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9344), AOM_ICDF(17170), AOM_ICDF(19325), AOM_ICDF(22119),
- AOM_ICDF(23284), AOM_ICDF(24378), AOM_ICDF(24911), AOM_ICDF(26095),
- AOM_ICDF(27781), AOM_ICDF(31121), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9344), AOM_ICDF(11650), AOM_ICDF(19788), AOM_ICDF(21928),
- AOM_ICDF(22916), AOM_ICDF(23571), AOM_ICDF(24362), AOM_ICDF(26633),
- AOM_ICDF(27946), AOM_ICDF(31212), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12928), AOM_ICDF(14428), AOM_ICDF(17080), AOM_ICDF(20882),
- AOM_ICDF(22104), AOM_ICDF(23149), AOM_ICDF(23715), AOM_ICDF(27167),
- AOM_ICDF(28932), AOM_ICDF(31218), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(11962), AOM_ICDF(13849), AOM_ICDF(16880),
- AOM_ICDF(19818), AOM_ICDF(21895), AOM_ICDF(23000), AOM_ICDF(25923),
- AOM_ICDF(27961), AOM_ICDF(31380), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10240), AOM_ICDF(13336), AOM_ICDF(15505), AOM_ICDF(18844),
- AOM_ICDF(21646), AOM_ICDF(24723), AOM_ICDF(25832), AOM_ICDF(27802),
- AOM_ICDF(31088), AOM_ICDF(31096), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(10683), AOM_ICDF(14446), AOM_ICDF(17035),
- AOM_ICDF(20211), AOM_ICDF(21577), AOM_ICDF(24370), AOM_ICDF(26477),
- AOM_ICDF(28223), AOM_ICDF(31734), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12928), AOM_ICDF(17358), AOM_ICDF(19982), AOM_ICDF(22123),
- AOM_ICDF(23335), AOM_ICDF(23948), AOM_ICDF(24890), AOM_ICDF(28884),
- AOM_ICDF(30197), AOM_ICDF(32148), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(12429), AOM_ICDF(16401), AOM_ICDF(20493),
- AOM_ICDF(21471), AOM_ICDF(22433), AOM_ICDF(23162), AOM_ICDF(24686),
- AOM_ICDF(29027), AOM_ICDF(31115), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(12157), AOM_ICDF(14796), AOM_ICDF(17676),
- AOM_ICDF(19754), AOM_ICDF(19762), AOM_ICDF(20641), AOM_ICDF(23274),
- AOM_ICDF(25569), AOM_ICDF(31058), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7296), AOM_ICDF(11083), AOM_ICDF(15313), AOM_ICDF(20550),
- AOM_ICDF(21783), AOM_ICDF(22727), AOM_ICDF(23461), AOM_ICDF(25072),
- AOM_ICDF(27195), AOM_ICDF(30380), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(10880), AOM_ICDF(13214), AOM_ICDF(15829), AOM_ICDF(16866),
- AOM_ICDF(20613), AOM_ICDF(22316), AOM_ICDF(24539), AOM_ICDF(27077),
- AOM_ICDF(28116), AOM_ICDF(31485), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(13868), AOM_ICDF(16397), AOM_ICDF(17486),
- AOM_ICDF(20011), AOM_ICDF(22071), AOM_ICDF(23357), AOM_ICDF(24990),
- AOM_ICDF(26336), AOM_ICDF(30276), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(8637), AOM_ICDF(17963), AOM_ICDF(18813),
- AOM_ICDF(21065), AOM_ICDF(22052), AOM_ICDF(23502), AOM_ICDF(25702),
- AOM_ICDF(26745), AOM_ICDF(30668), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(10682), AOM_ICDF(12496), AOM_ICDF(18240),
- AOM_ICDF(20500), AOM_ICDF(21585), AOM_ICDF(23387), AOM_ICDF(25795),
- AOM_ICDF(27119), AOM_ICDF(31001), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9856), AOM_ICDF(12056), AOM_ICDF(13722), AOM_ICDF(15196),
- AOM_ICDF(19276), AOM_ICDF(21891), AOM_ICDF(23643), AOM_ICDF(25538),
- AOM_ICDF(26854), AOM_ICDF(31515), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(12963), AOM_ICDF(14960), AOM_ICDF(16734),
- AOM_ICDF(21279), AOM_ICDF(25616), AOM_ICDF(27638), AOM_ICDF(28950),
- AOM_ICDF(31161), AOM_ICDF(31166), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(8604), AOM_ICDF(12044), AOM_ICDF(13632),
- AOM_ICDF(18931), AOM_ICDF(20553), AOM_ICDF(23452), AOM_ICDF(25800),
- AOM_ICDF(27754), AOM_ICDF(31668), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11520), AOM_ICDF(13372), AOM_ICDF(16642), AOM_ICDF(18137),
- AOM_ICDF(20232), AOM_ICDF(21510), AOM_ICDF(23052), AOM_ICDF(26792),
- AOM_ICDF(27974), AOM_ICDF(31274), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10240), AOM_ICDF(12483), AOM_ICDF(14364), AOM_ICDF(16168),
- AOM_ICDF(18668), AOM_ICDF(20707), AOM_ICDF(22158), AOM_ICDF(24410),
- AOM_ICDF(26370), AOM_ICDF(30744), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8064), AOM_ICDF(10798), AOM_ICDF(13829), AOM_ICDF(15128),
- AOM_ICDF(19136), AOM_ICDF(19152), AOM_ICDF(21057), AOM_ICDF(22583),
- AOM_ICDF(24513), AOM_ICDF(30645), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(11025), AOM_ICDF(16073), AOM_ICDF(17603),
- AOM_ICDF(20094), AOM_ICDF(21468), AOM_ICDF(22971), AOM_ICDF(24628),
- AOM_ICDF(26015), AOM_ICDF(29728), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(10368), AOM_ICDF(15372), AOM_ICDF(18442), AOM_ICDF(19576),
- AOM_ICDF(22674), AOM_ICDF(27128), AOM_ICDF(28232), AOM_ICDF(29624),
- AOM_ICDF(31363), AOM_ICDF(31368), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9472), AOM_ICDF(16687), AOM_ICDF(18957), AOM_ICDF(20272),
- AOM_ICDF(22852), AOM_ICDF(27082), AOM_ICDF(27839), AOM_ICDF(28995),
- AOM_ICDF(30943), AOM_ICDF(30948), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8064), AOM_ICDF(12334), AOM_ICDF(19197), AOM_ICDF(20956),
- AOM_ICDF(24804), AOM_ICDF(26553), AOM_ICDF(27556), AOM_ICDF(29877),
- AOM_ICDF(31311), AOM_ICDF(31320), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(14083), AOM_ICDF(16058), AOM_ICDF(19129),
- AOM_ICDF(21136), AOM_ICDF(23635), AOM_ICDF(24870), AOM_ICDF(27577),
- AOM_ICDF(31176), AOM_ICDF(31187), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(14208), AOM_ICDF(15589), AOM_ICDF(17640),
- AOM_ICDF(22080), AOM_ICDF(26660), AOM_ICDF(27947), AOM_ICDF(29400),
- AOM_ICDF(31605), AOM_ICDF(31611), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9216), AOM_ICDF(15167), AOM_ICDF(16263), AOM_ICDF(17767),
- AOM_ICDF(21531), AOM_ICDF(26689), AOM_ICDF(27607), AOM_ICDF(28880),
- AOM_ICDF(31291), AOM_ICDF(31296), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(12756), AOM_ICDF(15781), AOM_ICDF(17279),
- AOM_ICDF(21198), AOM_ICDF(24057), AOM_ICDF(26171), AOM_ICDF(29200),
- AOM_ICDF(31901), AOM_ICDF(31913), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(15074), AOM_ICDF(18244), AOM_ICDF(19878),
- AOM_ICDF(22246), AOM_ICDF(24436), AOM_ICDF(25560), AOM_ICDF(28991),
- AOM_ICDF(31687), AOM_ICDF(31700), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10496), AOM_ICDF(15128), AOM_ICDF(17012), AOM_ICDF(18989),
- AOM_ICDF(21294), AOM_ICDF(25011), AOM_ICDF(25999), AOM_ICDF(27784),
- AOM_ICDF(30934), AOM_ICDF(30941), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(5875), AOM_ICDF(8846), AOM_ICDF(11817),
- AOM_ICDF(14806), AOM_ICDF(17795), AOM_ICDF(20769), AOM_ICDF(23761),
- AOM_ICDF(26747), AOM_ICDF(29739), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7424), AOM_ICDF(12915), AOM_ICDF(17544), AOM_ICDF(19392),
- AOM_ICDF(23074), AOM_ICDF(25635), AOM_ICDF(26431), AOM_ICDF(28241),
- AOM_ICDF(30088), AOM_ICDF(30095), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(11648), AOM_ICDF(13565), AOM_ICDF(18996), AOM_ICDF(19908),
- AOM_ICDF(21897), AOM_ICDF(22852), AOM_ICDF(26656), AOM_ICDF(28172),
- AOM_ICDF(28995), AOM_ICDF(31283), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10240), AOM_ICDF(14255), AOM_ICDF(18109), AOM_ICDF(19716),
- AOM_ICDF(21521), AOM_ICDF(22859), AOM_ICDF(24613), AOM_ICDF(26161),
- AOM_ICDF(27279), AOM_ICDF(30392), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(7848), AOM_ICDF(18820), AOM_ICDF(19447),
- AOM_ICDF(22335), AOM_ICDF(22733), AOM_ICDF(25112), AOM_ICDF(28427),
- AOM_ICDF(29013), AOM_ICDF(31550), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11904), AOM_ICDF(13581), AOM_ICDF(17695), AOM_ICDF(19311),
- AOM_ICDF(21698), AOM_ICDF(22562), AOM_ICDF(24391), AOM_ICDF(26559),
- AOM_ICDF(27779), AOM_ICDF(30567), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10624), AOM_ICDF(12334), AOM_ICDF(14643), AOM_ICDF(16255),
- AOM_ICDF(20783), AOM_ICDF(22767), AOM_ICDF(24929), AOM_ICDF(26876),
- AOM_ICDF(27998), AOM_ICDF(31470), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12032), AOM_ICDF(14415), AOM_ICDF(16715), AOM_ICDF(18712),
- AOM_ICDF(21557), AOM_ICDF(25332), AOM_ICDF(27840), AOM_ICDF(29663),
- AOM_ICDF(31708), AOM_ICDF(31715), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9728), AOM_ICDF(10683), AOM_ICDF(13955), AOM_ICDF(14786),
- AOM_ICDF(18481), AOM_ICDF(19492), AOM_ICDF(26749), AOM_ICDF(28483),
- AOM_ICDF(29116), AOM_ICDF(31958), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(10032), AOM_ICDF(15755), AOM_ICDF(16949),
- AOM_ICDF(19144), AOM_ICDF(19744), AOM_ICDF(22082), AOM_ICDF(27608),
- AOM_ICDF(28411), AOM_ICDF(31838), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(14592), AOM_ICDF(15937), AOM_ICDF(18518), AOM_ICDF(19566),
- AOM_ICDF(21817), AOM_ICDF(23102), AOM_ICDF(24436), AOM_ICDF(26651),
- AOM_ICDF(28100), AOM_ICDF(30993), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(10791), AOM_ICDF(14718), AOM_ICDF(16094),
- AOM_ICDF(18560), AOM_ICDF(18570), AOM_ICDF(22120), AOM_ICDF(24188),
- AOM_ICDF(25677), AOM_ICDF(31280), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11136), AOM_ICDF(13058), AOM_ICDF(19006), AOM_ICDF(20135),
- AOM_ICDF(21463), AOM_ICDF(22159), AOM_ICDF(24042), AOM_ICDF(26348),
- AOM_ICDF(27367), AOM_ICDF(30064), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(12544), AOM_ICDF(15384), AOM_ICDF(20327), AOM_ICDF(21555),
- AOM_ICDF(23456), AOM_ICDF(24144), AOM_ICDF(25421), AOM_ICDF(27884),
- AOM_ICDF(28875), AOM_ICDF(31188), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10368), AOM_ICDF(15009), AOM_ICDF(17631), AOM_ICDF(18970),
- AOM_ICDF(20691), AOM_ICDF(21850), AOM_ICDF(22749), AOM_ICDF(25280),
- AOM_ICDF(26570), AOM_ICDF(29530), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(10956), AOM_ICDF(21554), AOM_ICDF(22698),
- AOM_ICDF(23666), AOM_ICDF(24052), AOM_ICDF(25122), AOM_ICDF(27792),
- AOM_ICDF(28612), AOM_ICDF(30825), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11520), AOM_ICDF(12888), AOM_ICDF(16374), AOM_ICDF(19132),
- AOM_ICDF(21186), AOM_ICDF(21843), AOM_ICDF(22902), AOM_ICDF(26440),
- AOM_ICDF(27928), AOM_ICDF(29946), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(12199), AOM_ICDF(14625), AOM_ICDF(17321),
- AOM_ICDF(20195), AOM_ICDF(21574), AOM_ICDF(23010), AOM_ICDF(25688),
- AOM_ICDF(27600), AOM_ICDF(30988), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10112), AOM_ICDF(13705), AOM_ICDF(16847), AOM_ICDF(19242),
- AOM_ICDF(22011), AOM_ICDF(24064), AOM_ICDF(26481), AOM_ICDF(29125),
- AOM_ICDF(30545), AOM_ICDF(30555), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9344), AOM_ICDF(10994), AOM_ICDF(15018), AOM_ICDF(16915),
- AOM_ICDF(20471), AOM_ICDF(21334), AOM_ICDF(24577), AOM_ICDF(27472),
- AOM_ICDF(28592), AOM_ICDF(31578), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(12928), AOM_ICDF(14540), AOM_ICDF(18022), AOM_ICDF(19481),
- AOM_ICDF(21028), AOM_ICDF(21825), AOM_ICDF(22728), AOM_ICDF(28191),
- AOM_ICDF(29154), AOM_ICDF(31683), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10368), AOM_ICDF(12160), AOM_ICDF(14900), AOM_ICDF(17161),
- AOM_ICDF(19379), AOM_ICDF(20521), AOM_ICDF(21747), AOM_ICDF(24534),
- AOM_ICDF(26677), AOM_ICDF(30318), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(11488), AOM_ICDF(16197), AOM_ICDF(18030),
- AOM_ICDF(20010), AOM_ICDF(20018), AOM_ICDF(21347), AOM_ICDF(23948),
- AOM_ICDF(25016), AOM_ICDF(30536), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7808), AOM_ICDF(10310), AOM_ICDF(15420), AOM_ICDF(18961),
- AOM_ICDF(20114), AOM_ICDF(20772), AOM_ICDF(21721), AOM_ICDF(24599),
- AOM_ICDF(26237), AOM_ICDF(29160), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(9856), AOM_ICDF(13764), AOM_ICDF(16995), AOM_ICDF(19540),
- AOM_ICDF(20802), AOM_ICDF(22302), AOM_ICDF(23113), AOM_ICDF(24519),
- AOM_ICDF(27717), AOM_ICDF(31604), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(15725), AOM_ICDF(17309), AOM_ICDF(20296),
- AOM_ICDF(21257), AOM_ICDF(22573), AOM_ICDF(23165), AOM_ICDF(23893),
- AOM_ICDF(27755), AOM_ICDF(31170), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7936), AOM_ICDF(11343), AOM_ICDF(19355), AOM_ICDF(21223),
- AOM_ICDF(22121), AOM_ICDF(22978), AOM_ICDF(23703), AOM_ICDF(26079),
- AOM_ICDF(27978), AOM_ICDF(31507), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11264), AOM_ICDF(14823), AOM_ICDF(17314), AOM_ICDF(20715),
- AOM_ICDF(21999), AOM_ICDF(22982), AOM_ICDF(23728), AOM_ICDF(25229),
- AOM_ICDF(28593), AOM_ICDF(31508), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(11788), AOM_ICDF(13666), AOM_ICDF(16523),
- AOM_ICDF(18630), AOM_ICDF(20579), AOM_ICDF(21574), AOM_ICDF(23335),
- AOM_ICDF(26298), AOM_ICDF(31264), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(14031), AOM_ICDF(15766), AOM_ICDF(18533),
- AOM_ICDF(21457), AOM_ICDF(24078), AOM_ICDF(24973), AOM_ICDF(26102),
- AOM_ICDF(31284), AOM_ICDF(31288), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7040), AOM_ICDF(9648), AOM_ICDF(12140), AOM_ICDF(14601),
- AOM_ICDF(16742), AOM_ICDF(18070), AOM_ICDF(21154), AOM_ICDF(23582),
- AOM_ICDF(27647), AOM_ICDF(31763), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(10240), AOM_ICDF(13466), AOM_ICDF(16837), AOM_ICDF(19351),
- AOM_ICDF(20636), AOM_ICDF(21620), AOM_ICDF(22474), AOM_ICDF(25815),
- AOM_ICDF(28364), AOM_ICDF(31976), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(11008), AOM_ICDF(13682), AOM_ICDF(15127), AOM_ICDF(18779),
- AOM_ICDF(19841), AOM_ICDF(20792), AOM_ICDF(21954), AOM_ICDF(23365),
- AOM_ICDF(29100), AOM_ICDF(31748), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7168), AOM_ICDF(12260), AOM_ICDF(15037), AOM_ICDF(17152),
- AOM_ICDF(18730), AOM_ICDF(18736), AOM_ICDF(19436), AOM_ICDF(20484),
- AOM_ICDF(24465), AOM_ICDF(30868), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(12469), AOM_ICDF(15422), AOM_ICDF(19291),
- AOM_ICDF(20301), AOM_ICDF(21344), AOM_ICDF(21894), AOM_ICDF(23415),
- AOM_ICDF(27696), AOM_ICDF(31042), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(10112), AOM_ICDF(13929), AOM_ICDF(17880), AOM_ICDF(18857),
- AOM_ICDF(20955), AOM_ICDF(20963), AOM_ICDF(21974), AOM_ICDF(23273),
- AOM_ICDF(24734), AOM_ICDF(31352), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8064), AOM_ICDF(15826), AOM_ICDF(17929), AOM_ICDF(19017),
- AOM_ICDF(21016), AOM_ICDF(21024), AOM_ICDF(21687), AOM_ICDF(22701),
- AOM_ICDF(24242), AOM_ICDF(30645), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6528), AOM_ICDF(9196), AOM_ICDF(20118), AOM_ICDF(21101),
- AOM_ICDF(22227), AOM_ICDF(22231), AOM_ICDF(22997), AOM_ICDF(25070),
- AOM_ICDF(25919), AOM_ICDF(30923), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9600), AOM_ICDF(13218), AOM_ICDF(15898), AOM_ICDF(17780),
- AOM_ICDF(19991), AOM_ICDF(20000), AOM_ICDF(21196), AOM_ICDF(23912),
- AOM_ICDF(26044), AOM_ICDF(31139), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8960), AOM_ICDF(12037), AOM_ICDF(14178), AOM_ICDF(15681),
- AOM_ICDF(20126), AOM_ICDF(20143), AOM_ICDF(21435), AOM_ICDF(23083),
- AOM_ICDF(24675), AOM_ICDF(31466), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(2944), AOM_ICDF(5875), AOM_ICDF(8846), AOM_ICDF(11817),
- AOM_ICDF(14806), AOM_ICDF(17795), AOM_ICDF(20769), AOM_ICDF(23761),
- AOM_ICDF(26747), AOM_ICDF(29739), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9472), AOM_ICDF(12334), AOM_ICDF(15469), AOM_ICDF(16848),
- AOM_ICDF(19972), AOM_ICDF(19984), AOM_ICDF(22292), AOM_ICDF(24384),
- AOM_ICDF(25891), AOM_ICDF(31676), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(11176), AOM_ICDF(15497), AOM_ICDF(16676),
- AOM_ICDF(18528), AOM_ICDF(18535), AOM_ICDF(19595), AOM_ICDF(24334),
- AOM_ICDF(25725), AOM_ICDF(31723), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8704), AOM_ICDF(12141), AOM_ICDF(14313), AOM_ICDF(15828),
- AOM_ICDF(18358), AOM_ICDF(18368), AOM_ICDF(19469), AOM_ICDF(21089),
- AOM_ICDF(24027), AOM_ICDF(30700), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(7680), AOM_ICDF(11689), AOM_ICDF(14556), AOM_ICDF(15548),
- AOM_ICDF(17878), AOM_ICDF(17887), AOM_ICDF(18873), AOM_ICDF(20512),
- AOM_ICDF(22152), AOM_ICDF(31004), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(11476), AOM_ICDF(16600), AOM_ICDF(18052),
- AOM_ICDF(19683), AOM_ICDF(19689), AOM_ICDF(20509), AOM_ICDF(22077),
- AOM_ICDF(23496), AOM_ICDF(29504), AOM_ICDF(32768), 0,
- },
- },
- {
- {
- AOM_ICDF(9728), AOM_ICDF(14651), AOM_ICDF(19394), AOM_ICDF(20550),
- AOM_ICDF(21680), AOM_ICDF(22479), AOM_ICDF(23516), AOM_ICDF(24952),
- AOM_ICDF(26183), AOM_ICDF(28538), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8832), AOM_ICDF(18693), AOM_ICDF(20913), AOM_ICDF(21933),
- AOM_ICDF(22956), AOM_ICDF(23831), AOM_ICDF(24341), AOM_ICDF(25317),
- AOM_ICDF(26434), AOM_ICDF(29028), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(5888), AOM_ICDF(8413), AOM_ICDF(20542), AOM_ICDF(21609),
- AOM_ICDF(22437), AOM_ICDF(22864), AOM_ICDF(23663), AOM_ICDF(26329),
- AOM_ICDF(26900), AOM_ICDF(29828), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9984), AOM_ICDF(13134), AOM_ICDF(16328), AOM_ICDF(18267),
- AOM_ICDF(19814), AOM_ICDF(21461), AOM_ICDF(22393), AOM_ICDF(24944),
- AOM_ICDF(26320), AOM_ICDF(29653), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8448), AOM_ICDF(12425), AOM_ICDF(15474), AOM_ICDF(17031),
- AOM_ICDF(19216), AOM_ICDF(20889), AOM_ICDF(23077), AOM_ICDF(25108),
- AOM_ICDF(26548), AOM_ICDF(30108), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9856), AOM_ICDF(15675), AOM_ICDF(19169), AOM_ICDF(20837),
- AOM_ICDF(22638), AOM_ICDF(24556), AOM_ICDF(25438), AOM_ICDF(27114),
- AOM_ICDF(29449), AOM_ICDF(29456), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6784), AOM_ICDF(10294), AOM_ICDF(14542), AOM_ICDF(15724),
- AOM_ICDF(19109), AOM_ICDF(19972), AOM_ICDF(24084), AOM_ICDF(26329),
- AOM_ICDF(27637), AOM_ICDF(30433), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(8320), AOM_ICDF(10873), AOM_ICDF(17095), AOM_ICDF(18466),
- AOM_ICDF(19674), AOM_ICDF(20129), AOM_ICDF(21230), AOM_ICDF(27562),
- AOM_ICDF(28568), AOM_ICDF(30858), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(9088), AOM_ICDF(13196), AOM_ICDF(15898), AOM_ICDF(17566),
- AOM_ICDF(19210), AOM_ICDF(20354), AOM_ICDF(21186), AOM_ICDF(23647),
- AOM_ICDF(26235), AOM_ICDF(30548), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6912), AOM_ICDF(11512), AOM_ICDF(16390), AOM_ICDF(17479),
- AOM_ICDF(19065), AOM_ICDF(19071), AOM_ICDF(19740), AOM_ICDF(21715),
- AOM_ICDF(23208), AOM_ICDF(29132), AOM_ICDF(32768), 0,
- },
- {
- AOM_ICDF(6656), AOM_ICDF(11485), AOM_ICDF(16060), AOM_ICDF(17734),
- AOM_ICDF(19099), AOM_ICDF(19814), AOM_ICDF(21018), AOM_ICDF(23053),
- AOM_ICDF(24333), AOM_ICDF(27260), AOM_ICDF(32768), 0,
- },
- },
-#endif // CONFIG_SMOOTH_HV
-};
-#endif // CONFIG_KF_CTX
-
-#if CONFIG_LPF_SB
-static const aom_cdf_prob default_lpf_reuse_cdf[LPF_REUSE_CONTEXT][CDF_SIZE(
- 2)] = { { AOM_ICDF(8192), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(32768), 0 } };
-
-static const aom_cdf_prob
- default_lpf_delta_cdf[LPF_DELTA_CONTEXT][CDF_SIZE(DELTA_RANGE)] = {
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 }
- };
-
-static const aom_cdf_prob
- default_lpf_sign_cdf[LPF_REUSE_CONTEXT][LPF_SIGN_CONTEXT][CDF_SIZE(2)] = {
- { { AOM_ICDF(6554), AOM_ICDF(32768), 0 },
- { AOM_ICDF(26214), AOM_ICDF(32768), 0 } },
- { { AOM_ICDF(16384), AOM_ICDF(32768), 0 },
- { AOM_ICDF(16384), AOM_ICDF(32768), 0 } }
- };
-#endif // CONFIG_LPF_SB
-
static void init_mode_probs(FRAME_CONTEXT *fc) {
- av1_copy(fc->partition_prob, default_partition_probs);
- av1_copy(fc->intra_inter_prob, default_intra_inter_p);
- av1_copy(fc->comp_inter_prob, default_comp_inter_p);
av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf);
av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf);
av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf);
av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf);
av1_copy(fc->kf_y_cdf, default_kf_y_mode_cdf);
-#if CONFIG_MRC_TX
- av1_copy(fc->mrc_mask_inter_cdf, default_mrc_mask_inter_cdf);
- av1_copy(fc->mrc_mask_intra_cdf, default_mrc_mask_intra_cdf);
-#endif // CONFIG_MRC_TX
-#if CONFIG_NEW_MULTISYMBOL
+ av1_copy(fc->angle_delta_cdf, default_angle_delta_cdf);
av1_copy(fc->comp_inter_cdf, default_comp_inter_cdf);
-#endif // CONFIG_NEW_MULTISYMBOL
-#if CONFIG_EXT_COMP_REFS
- av1_copy(fc->comp_ref_type_prob, default_comp_ref_type_p);
- av1_copy(fc->uni_comp_ref_prob, default_uni_comp_ref_p);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf);
av1_copy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf);
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_EXT_COMP_REFS
- av1_copy(fc->comp_ref_prob, default_comp_ref_p);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf);
av1_copy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf);
av1_copy(fc->comp_ref_cdf, default_comp_ref_cdf);
-#endif
-#if CONFIG_LV_MAP
- av1_copy(fc->txb_skip, default_txb_skip);
- av1_copy(fc->nz_map, default_nz_map);
- av1_copy(fc->eob_flag, default_eob_flag);
- av1_copy(fc->dc_sign, default_dc_sign);
- av1_copy(fc->coeff_base, default_coeff_base);
- av1_copy(fc->coeff_lps, default_coeff_lps);
-#if BR_NODE
- av1_copy(fc->coeff_br, default_coeff_br);
-#endif
-#if CONFIG_CTX1D
- av1_copy(fc->eob_mode, default_eob_mode);
- av1_copy(fc->empty_line, default_empty_line);
- av1_copy(fc->hv_eob, default_hv_eob);
-#endif // CONFIG_CTX1D
-
-#if LV_MAP_PROB
- av1_init_txb_probs(fc);
-#endif // LV_MAP_PROB
-#endif
-#if CONFIG_EXT_REFS
- av1_copy(fc->comp_bwdref_prob, default_comp_bwdref_p);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf);
-#endif
-#endif // CONFIG_EXT_REFS
- av1_copy(fc->single_ref_prob, default_single_ref_p);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->single_ref_cdf, default_single_ref_cdf);
-#endif
-#if CONFIG_COMPOUND_SINGLEREF
- av1_copy(fc->comp_inter_mode_prob, default_comp_inter_mode_p);
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- fc->quarter_tx_size_prob = default_quarter_tx_size_prob;
-#if CONFIG_NEW_MULTISYMBOL
- av1_copy(fc->quarter_tx_size_cdf, default_quarter_tx_size_cdf);
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif
-#if CONFIG_VAR_TX
- av1_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->txfm_partition_cdf, default_txfm_partition_cdf);
-#endif
-#endif
- av1_copy(fc->skip_probs, default_skip_probs);
- av1_copy(fc->newmv_prob, default_newmv_prob);
- av1_copy(fc->zeromv_prob, default_zeromv_prob);
- av1_copy(fc->refmv_prob, default_refmv_prob);
- av1_copy(fc->drl_prob, default_drl_prob);
-#if CONFIG_NEW_MULTISYMBOL
+ av1_copy(fc->compound_index_cdf, default_compound_idx_cdfs);
+ av1_copy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs);
av1_copy(fc->newmv_cdf, default_newmv_cdf);
av1_copy(fc->zeromv_cdf, default_zeromv_cdf);
av1_copy(fc->refmv_cdf, default_refmv_cdf);
av1_copy(fc->drl_cdf, default_drl_cdf);
-#endif
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- av1_copy(fc->motion_mode_prob, default_motion_mode_prob);
av1_copy(fc->motion_mode_cdf, default_motion_mode_cdf);
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
- av1_copy(fc->ncobmc_mode_prob, default_ncobmc_mode_prob);
- av1_copy(fc->ncobmc_mode_cdf, default_ncobmc_mode_cdf);
-#endif
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- av1_copy(fc->obmc_prob, default_obmc_prob);
-#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
av1_copy(fc->obmc_cdf, default_obmc_cdf);
-#endif
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- av1_copy(fc->ncobmc_prob, default_ncobmc_prob);
- av1_copy(fc->ncobmc_cdf, default_ncobmc_cdf);
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- av1_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
av1_copy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf);
-#if CONFIG_COMPOUND_SINGLEREF
- av1_copy(fc->inter_singleref_comp_mode_probs,
- default_inter_singleref_comp_mode_probs);
- av1_copy(fc->inter_singleref_comp_mode_cdf,
- default_inter_singleref_comp_mode_cdf);
-#endif // CONFIG_COMPOUND_SINGLEREF
- av1_copy(fc->compound_type_prob, default_compound_type_probs);
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
av1_copy(fc->compound_type_cdf, default_compound_type_cdf);
-#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
-#if CONFIG_INTERINTRA
- av1_copy(fc->interintra_prob, default_interintra_prob);
- av1_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob);
-#if CONFIG_NEW_MULTISYMBOL
+ av1_copy(fc->wedge_idx_cdf, default_wedge_idx_cdf);
av1_copy(fc->interintra_cdf, default_interintra_cdf);
av1_copy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf);
-#endif // CONFIG_NEW_MULTISYMBOL
- av1_copy(fc->interintra_mode_prob, default_interintra_mode_prob);
av1_copy(fc->interintra_mode_cdf, default_interintra_mode_cdf);
-#endif // CONFIG_INTERINTRA
-#if CONFIG_SUPERTX
- av1_copy(fc->supertx_prob, default_supertx_prob);
-#endif // CONFIG_SUPERTX
- av1_copy(fc->seg.tree_probs, default_segment_tree_probs);
- av1_copy(fc->seg.pred_probs, default_segment_pred_probs);
-#if CONFIG_NEW_MULTISYMBOL
av1_copy(fc->seg.pred_cdf, default_segment_pred_cdf);
-#endif
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
- av1_copy(fc->intra_filter_probs, default_intra_filter_probs);
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- av1_copy(fc->filter_intra_probs, default_filter_intra_probs);
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_LGT_FROM_PRED
- av1_copy(fc->intra_lgt_prob, default_intra_lgt_prob);
- av1_copy(fc->inter_lgt_prob, default_inter_lgt_prob);
-#endif // CONFIG_LGT_FROM_PRED
-#if CONFIG_LOOP_RESTORATION
- av1_copy(fc->switchable_restore_prob, default_switchable_restore_prob);
-#endif // CONFIG_LOOP_RESTORATION
+ av1_copy(fc->seg.tree_cdf, default_seg_tree_cdf);
+ av1_copy(fc->filter_intra_cdfs, default_filter_intra_cdfs);
+ av1_copy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf);
+ av1_copy(fc->switchable_restore_cdf, default_switchable_restore_cdf);
+ av1_copy(fc->wiener_restore_cdf, default_wiener_restore_cdf);
+ av1_copy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf);
av1_copy(fc->y_mode_cdf, default_if_y_mode_cdf);
av1_copy(fc->uv_mode_cdf, default_uv_mode_cdf);
av1_copy(fc->switchable_interp_cdf, default_switchable_interp_cdf);
av1_copy(fc->partition_cdf, default_partition_cdf);
av1_copy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf);
av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf);
-#if CONFIG_NEW_MULTISYMBOL
+ av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs);
av1_copy(fc->skip_cdfs, default_skip_cdfs);
av1_copy(fc->intra_inter_cdf, default_intra_inter_cdf);
-#endif
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- av1_copy(fc->intra_filter_cdf, default_intra_filter_cdf);
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- av1_copy(fc->seg.tree_cdf, default_seg_tree_cdf);
+ for (int i = 0; i < SPATIAL_PREDICTION_PROBS; i++)
+ av1_copy(fc->seg.spatial_pred_seg_cdf[i],
+ default_spatial_pred_seg_tree_cdf[i]);
av1_copy(fc->tx_size_cdf, default_tx_size_cdf);
- av1_copy(fc->delta_q_prob, default_delta_q_probs);
av1_copy(fc->delta_q_cdf, default_delta_q_cdf);
-#if CONFIG_EXT_DELTA_Q
- av1_copy(fc->delta_lf_prob, default_delta_lf_probs);
av1_copy(fc->delta_lf_cdf, default_delta_lf_cdf);
-#if CONFIG_LOOPFILTER_LEVEL
av1_copy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf);
-#endif // CONFIG_LOOPFILTER_LEVEL
-#endif
-#if CONFIG_CFL
av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf);
av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
-#endif
-#if CONFIG_INTRABC
av1_copy(fc->intrabc_cdf, default_intrabc_cdf);
-#endif
-#if CONFIG_LPF_SB
- av1_copy(fc->lpf_reuse_cdf, default_lpf_reuse_cdf);
- av1_copy(fc->lpf_delta_cdf, default_lpf_delta_cdf);
- av1_copy(fc->lpf_sign_cdf, default_lpf_sign_cdf);
-#endif // CONFIG_LPF_SB
}
-void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
- int i, j;
- FRAME_CONTEXT *fc = cm->fc;
- const FRAME_CONTEXT *pre_fc = cm->pre_fc;
- const FRAME_COUNTS *counts = &cm->counts;
-
- for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
- fc->intra_inter_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->intra_inter_prob[i], counts->intra_inter[i]);
-
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- fc->comp_inter_prob[i] = av1_mode_mv_merge_probs(pre_fc->comp_inter_prob[i],
- counts->comp_inter[i]);
-
-#if CONFIG_EXT_COMP_REFS
- for (i = 0; i < COMP_REF_TYPE_CONTEXTS; i++)
- fc->comp_ref_type_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->comp_ref_type_prob[i], counts->comp_ref_type[i]);
-
- for (i = 0; i < UNI_COMP_REF_CONTEXTS; i++)
- for (j = 0; j < (UNIDIR_COMP_REFS - 1); j++)
- fc->uni_comp_ref_prob[i][j] = av1_mode_mv_merge_probs(
- pre_fc->uni_comp_ref_prob[i][j], counts->uni_comp_ref[i][j]);
-#endif // CONFIG_EXT_COMP_REFS
-
-#if CONFIG_EXT_REFS
- for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < (FWD_REFS - 1); j++)
- fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
- counts->comp_ref[i][j]);
- for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < (BWD_REFS - 1); j++)
- fc->comp_bwdref_prob[i][j] = mode_mv_merge_probs(
- pre_fc->comp_bwdref_prob[i][j], counts->comp_bwdref[i][j]);
-#else
- for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < (COMP_REFS - 1); j++)
- fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
- counts->comp_ref[i][j]);
-#endif // CONFIG_EXT_REFS
-
- for (i = 0; i < REF_CONTEXTS; i++)
- for (j = 0; j < (SINGLE_REFS - 1); j++)
- fc->single_ref_prob[i][j] = av1_mode_mv_merge_probs(
- pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
-
-#if CONFIG_COMPOUND_SINGLEREF
- for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
- fc->comp_inter_mode_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->comp_inter_mode_prob[i], counts->comp_inter_mode[i]);
-
-#endif // CONFIG_COMPOUND_SINGLEREF
-
- for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
- fc->newmv_prob[i] =
- av1_mode_mv_merge_probs(pre_fc->newmv_prob[i], counts->newmv_mode[i]);
- for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
- fc->zeromv_prob[i] =
- av1_mode_mv_merge_probs(pre_fc->zeromv_prob[i], counts->zeromv_mode[i]);
- for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
- fc->refmv_prob[i] =
- av1_mode_mv_merge_probs(pre_fc->refmv_prob[i], counts->refmv_mode[i]);
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- fc->drl_prob[i] =
- av1_mode_mv_merge_probs(pre_fc->drl_prob[i], counts->drl_mode[i]);
-
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; ++i)
- aom_tree_merge_probs(av1_motion_mode_tree, pre_fc->motion_mode_prob[i],
- counts->motion_mode[i], fc->motion_mode_prob[i]);
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- for (i = 0; i < ADAPT_OVERLAP_BLOCKS; ++i)
- aom_tree_merge_probs(av1_ncobmc_mode_tree, pre_fc->ncobmc_mode_prob[i],
- counts->ncobmc_mode[i], fc->ncobmc_mode_prob[i]);
-#if CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; ++i)
- aom_tree_merge_probs(av1_ncobmc_tree, pre_fc->ncobmc_prob[i],
- counts->ncobmc[i], fc->ncobmc_prob[i]);
-#endif
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; ++i)
- fc->obmc_prob[i] =
- av1_mode_mv_merge_probs(pre_fc->obmc_prob[i], counts->obmc[i]);
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-
-#if CONFIG_SUPERTX
- for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
- for (j = TX_8X8; j < TX_SIZES; ++j) {
- fc->supertx_prob[i][j] = av1_mode_mv_merge_probs(
- pre_fc->supertx_prob[i][j], counts->supertx[i][j]);
- }
- }
-#endif // CONFIG_SUPERTX
-
- for (i = 0; i < INTER_MODE_CONTEXTS; i++)
- aom_tree_merge_probs(
- av1_inter_compound_mode_tree, pre_fc->inter_compound_mode_probs[i],
- counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]);
-#if CONFIG_COMPOUND_SINGLEREF
- for (i = 0; i < INTER_MODE_CONTEXTS; i++)
- aom_tree_merge_probs(av1_inter_singleref_comp_mode_tree,
- pre_fc->inter_singleref_comp_mode_probs[i],
- counts->inter_singleref_comp_mode[i],
- fc->inter_singleref_comp_mode_probs[i]);
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_INTERINTRA
- if (cm->allow_interintra_compound) {
- for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
- if (is_interintra_allowed_bsize_group(i))
- fc->interintra_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->interintra_prob[i], counts->interintra[i]);
- }
- for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
- aom_tree_merge_probs(
- av1_interintra_mode_tree, pre_fc->interintra_mode_prob[i],
- counts->interintra_mode[i], fc->interintra_mode_prob[i]);
- }
-#if CONFIG_WEDGE
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
- fc->wedge_interintra_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]);
- }
-#endif // CONFIG_WEDGE
- }
-#endif // CONFIG_INTERINTRA
-
-#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
- if (cm->allow_masked_compound) {
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- aom_tree_merge_probs(
- av1_compound_type_tree, pre_fc->compound_type_prob[i],
- counts->compound_interinter[i], fc->compound_type_prob[i]);
- }
- }
-#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+void av1_set_default_ref_deltas(int8_t *ref_deltas) {
+ assert(ref_deltas != NULL);
+
+ ref_deltas[INTRA_FRAME] = 1;
+ ref_deltas[LAST_FRAME] = 0;
+ ref_deltas[LAST2_FRAME] = ref_deltas[LAST_FRAME];
+ ref_deltas[LAST3_FRAME] = ref_deltas[LAST_FRAME];
+ ref_deltas[BWDREF_FRAME] = ref_deltas[LAST_FRAME];
+ ref_deltas[GOLDEN_FRAME] = -1;
+ ref_deltas[ALTREF2_FRAME] = -1;
+ ref_deltas[ALTREF_FRAME] = -1;
}
-void av1_adapt_intra_frame_probs(AV1_COMMON *cm) {
- int i;
- FRAME_CONTEXT *fc = cm->fc;
- const FRAME_CONTEXT *pre_fc = cm->pre_fc;
- const FRAME_COUNTS *counts = &cm->counts;
-
- if (cm->tx_mode == TX_MODE_SELECT) {
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- fc->quarter_tx_size_prob = av1_mode_mv_merge_probs(
- pre_fc->quarter_tx_size_prob, counts->quarter_tx_size);
-#endif
- }
-
-#if CONFIG_VAR_TX
- if (cm->tx_mode == TX_MODE_SELECT) {
- for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
- fc->txfm_partition_prob[i] = av1_mode_mv_merge_probs(
- pre_fc->txfm_partition_prob[i], counts->txfm_partition[i]);
- }
-#endif
-
- for (i = 0; i < SKIP_CONTEXTS; ++i)
- fc->skip_probs[i] =
- av1_mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]);
-
-#if CONFIG_LGT_FROM_PRED
- int j;
- if (LGT_FROM_PRED_INTRA) {
- for (i = TX_4X4; i < LGT_SIZES; ++i) {
- for (j = 0; j < INTRA_MODES; ++j)
- fc->intra_lgt_prob[i][j] = av1_mode_mv_merge_probs(
- pre_fc->intra_lgt_prob[i][j], counts->intra_lgt[i][j]);
- }
- }
- if (LGT_FROM_PRED_INTER) {
- for (i = TX_4X4; i < LGT_SIZES; ++i) {
- fc->inter_lgt_prob[i] = av1_mode_mv_merge_probs(pre_fc->inter_lgt_prob[i],
- counts->inter_lgt[i]);
- }
- }
-#endif // CONFIG_LGT_FROM_PRED
-
- if (cm->seg.temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++)
- fc->seg.pred_probs[i] = av1_mode_mv_merge_probs(pre_fc->seg.pred_probs[i],
- counts->seg.pred[i]);
-
- aom_tree_merge_probs(av1_segment_tree, pre_fc->seg.tree_probs,
- counts->seg.tree_mispred, fc->seg.tree_probs);
- } else {
- aom_tree_merge_probs(av1_segment_tree, pre_fc->seg.tree_probs,
- counts->seg.tree_total, fc->seg.tree_probs);
- }
+void av1_set_default_mode_deltas(int8_t *mode_deltas) {
+ assert(mode_deltas != NULL);
-#if CONFIG_EXT_PARTITION_TYPES
- for (i = 0; i < PARTITION_PLOFFSET; ++i)
- aom_tree_merge_probs(av1_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], fc->partition_prob[i]);
- for (; i < PARTITION_CONTEXTS_PRIMARY; ++i)
- aom_tree_merge_probs(av1_ext_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], fc->partition_prob[i]);
-#else
- for (i = 0; i < PARTITION_CONTEXTS_PRIMARY; ++i) {
- aom_tree_merge_probs(av1_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], fc->partition_prob[i]);
- }
-#endif // CONFIG_EXT_PARTITION_TYPES
-#if CONFIG_UNPOISON_PARTITION_CTX
- for (i = PARTITION_CONTEXTS_PRIMARY;
- i < PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES; ++i) {
- unsigned int ct[2] = { counts->partition[i][PARTITION_VERT],
- counts->partition[i][PARTITION_SPLIT] };
- assert(counts->partition[i][PARTITION_NONE] == 0);
- assert(counts->partition[i][PARTITION_HORZ] == 0);
- assert(fc->partition_prob[i][PARTITION_NONE] == 0);
- assert(fc->partition_prob[i][PARTITION_HORZ] == 0);
- fc->partition_prob[i][PARTITION_VERT] =
- av1_mode_mv_merge_probs(pre_fc->partition_prob[i][PARTITION_VERT], ct);
- }
- for (i = PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES;
- i < PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES; ++i) {
- unsigned int ct[2] = { counts->partition[i][PARTITION_HORZ],
- counts->partition[i][PARTITION_SPLIT] };
- assert(counts->partition[i][PARTITION_NONE] == 0);
- assert(counts->partition[i][PARTITION_VERT] == 0);
- assert(fc->partition_prob[i][PARTITION_NONE] == 0);
- assert(fc->partition_prob[i][PARTITION_VERT] == 0);
- fc->partition_prob[i][PARTITION_HORZ] =
- av1_mode_mv_merge_probs(pre_fc->partition_prob[i][PARTITION_HORZ], ct);
- }
-#endif
- for (i = 0; i < DELTA_Q_PROBS; ++i)
- fc->delta_q_prob[i] =
- mode_mv_merge_probs(pre_fc->delta_q_prob[i], counts->delta_q[i]);
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- for (i = 0; i < FRAME_LF_COUNT; ++i)
- for (int j = 0; j < DELTA_LF_PROBS; ++j)
- fc->delta_lf_multi_prob[i][j] = mode_mv_merge_probs(
- pre_fc->delta_lf_multi_prob[i][j], counts->delta_lf_multi[i][j]);
-#endif // CONFIG_LOOPFILTER_LEVEL
- for (i = 0; i < DELTA_LF_PROBS; ++i)
- fc->delta_lf_prob[i] =
- mode_mv_merge_probs(pre_fc->delta_lf_prob[i], counts->delta_lf[i]);
-#endif // CONFIG_EXT_DELTA_Q
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
- for (i = 0; i < INTRA_FILTERS + 1; ++i) {
- aom_tree_merge_probs(av1_intra_filter_tree, pre_fc->intra_filter_probs[i],
- counts->intra_filter[i], fc->intra_filter_probs[i]);
- }
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- for (i = 0; i < PLANE_TYPES; ++i) {
- fc->filter_intra_probs[i] = av1_mode_mv_merge_probs(
- pre_fc->filter_intra_probs[i], counts->filter_intra[i]);
- }
-#endif // CONFIG_FILTER_INTRA
+ mode_deltas[0] = 0;
+ mode_deltas[1] = 0;
}
static void set_default_lf_deltas(struct loopfilter *lf) {
lf->mode_ref_delta_enabled = 1;
lf->mode_ref_delta_update = 1;
- lf->ref_deltas[INTRA_FRAME] = 1;
- lf->ref_deltas[LAST_FRAME] = 0;
-#if CONFIG_EXT_REFS
- lf->ref_deltas[LAST2_FRAME] = lf->ref_deltas[LAST_FRAME];
- lf->ref_deltas[LAST3_FRAME] = lf->ref_deltas[LAST_FRAME];
- lf->ref_deltas[BWDREF_FRAME] = lf->ref_deltas[LAST_FRAME];
-#endif // CONFIG_EXT_REFS
- lf->ref_deltas[GOLDEN_FRAME] = -1;
-#if CONFIG_EXT_REFS
- lf->ref_deltas[ALTREF2_FRAME] = -1;
-#endif // CONFIG_EXT_REFS
- lf->ref_deltas[ALTREF_FRAME] = -1;
-
- lf->mode_deltas[0] = 0;
- lf->mode_deltas[1] = 0;
+ av1_set_default_ref_deltas(lf->ref_deltas);
+ av1_set_default_mode_deltas(lf->mode_deltas);
+}
- av1_copy(lf->last_ref_deltas, lf->ref_deltas);
- av1_copy(lf->last_mode_deltas, lf->mode_deltas);
+void av1_setup_frame_contexts(AV1_COMMON *cm) {
+ // Store the frame context into a special slot (not associated with any
+ // reference buffer), so that we can set up cm->pre_fc correctly later
+ // This function must ONLY be called when cm->fc has been initialized with
+ // default probs, either by av1_setup_past_independence or after manually
+ // initializing them
+ cm->frame_contexts[FRAME_CONTEXT_DEFAULTS] = *cm->fc;
+ if (cm->large_scale_tile) {
+ for (int i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+ }
}
void av1_setup_past_independence(AV1_COMMON *cm) {
// Reset the segment feature data to the default stats:
// Features disabled, 0, with delta coding (Default state).
- struct loopfilter *const lf = &cm->lf;
-
- int i;
av1_clearall_segfeatures(&cm->seg);
- cm->seg.abs_delta = SEGMENT_DELTADATA;
- if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
- memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+ cm->current_frame_seg_map = cm->cur_frame->seg_map;
if (cm->current_frame_seg_map)
memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
- // Reset the mode ref deltas for loop filter
- av1_zero(lf->last_ref_deltas);
- av1_zero(lf->last_mode_deltas);
- set_default_lf_deltas(lf);
-
- // To force update of the sharpness
- lf->last_sharpness_level = -1;
+ // reset mode ref deltas
+ av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
+ av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
+ set_default_lf_deltas(&cm->lf);
av1_default_coef_probs(cm);
init_mode_probs(cm->fc);
av1_init_mv_probs(cm);
-#if CONFIG_LV_MAP
av1_init_lv_map(cm);
-#endif
-#if CONFIG_PVQ
- av1_default_pvq_probs(cm);
-#endif // CONFIG_PVQ
-#if CONFIG_ADAPT_SCAN
- av1_init_scan_order(cm);
-#endif
- av1_convolve_init(cm);
cm->fc->initialized = 1;
-
-#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
- if (cm->frame_type == KEY_FRAME) {
- // Reset all frame contexts, as all reference frames will be lost.
- for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
- }
-#else
- if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
- cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
- // Reset all frame contexts.
- for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
- } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
-#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
- // Reset the frame context of the first specified ref frame.
- if (cm->frame_refs[0].idx >= 0) {
- cm->frame_contexts[cm->frame_refs[0].idx] = *cm->fc;
- }
-#else
- // Reset only the frame context specified in the frame header.
- cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
-#endif // CONFIG_NO_FRAME_CONTEXT_SIGNALING
- }
-#endif // CONFIG_NO_FRAME_CONTEXT_SIGNALING
+ av1_setup_frame_contexts(cm);
// prev_mip will only be allocated in encoder.
- if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
+ if (frame_is_intra_only(cm) && cm->prev_mip)
memset(cm->prev_mip, 0,
- cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
-#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
- cm->frame_context_idx = 0;
-#endif // !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+ cm->mi_stride * cm->mi_rows * sizeof(*cm->prev_mip));
}
diff --git a/third_party/aom/av1/common/entropymode.h b/third_party/aom/av1/common/entropymode.h
index 3452241b0..0bd2e20a1 100644
--- a/third_party/aom/av1/common/entropymode.h
+++ b/third_party/aom/av1/common/entropymode.h
@@ -18,25 +18,16 @@
#include "av1/common/seg_common.h"
#include "aom_dsp/aom_filter.h"
-#if CONFIG_PVQ
-#include "av1/common/pvq.h"
-#include "av1/common/pvq_state.h"
-#include "av1/common/generic_code.h"
-#endif // CONFIG_PVQ
-
#ifdef __cplusplus
extern "C" {
#endif
#define BLOCK_SIZE_GROUPS 4
-#define TX_SIZE_CONTEXTS 2
+#define TX_SIZE_CONTEXTS 3
#define INTER_OFFSET(mode) ((mode)-NEARESTMV)
-#if CONFIG_COMPOUND_SINGLEREF
-#define INTER_SINGLEREF_COMP_OFFSET(mode) ((mode)-SR_NEAREST_NEARMV)
-#endif // CONFIG_COMPOUND_SINGLEREF
-#define INTER_COMPOUND_OFFSET(mode) ((mode)-NEAREST_NEARESTMV)
+#define INTER_COMPOUND_OFFSET(mode) (uint8_t)((mode)-NEAREST_NEARESTMV)
// Number of possible contexts for a color index.
// As can be seen from av1_get_palette_color_index_context(), the possible
@@ -44,14 +35,6 @@ extern "C" {
// a value from 0 to 4 using 'palette_color_index_context_lookup' table.
#define PALETTE_COLOR_INDEX_CONTEXTS 5
-// Maximum number of colors in a palette.
-#define PALETTE_MAX_SIZE 8
-// Minimum number of colors in a palette.
-#define PALETTE_MIN_SIZE 2
-
-// Palette mode is available for block sizes >= 8x8.
-#define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1)
-
// Palette Y mode context for a block is determined by number of neighboring
// blocks (top and/or left) using a palette for Y plane. So, possible Y mode'
// context values are:
@@ -66,11 +49,14 @@ extern "C" {
// 1 if this block uses palette for Y plane (i.e. Y palette size > 0).
#define PALETTE_UV_MODE_CONTEXTS 2
-#define PALETTE_MAX_BLOCK_SIZE (64 * 64)
+// Map the number of pixels in a block size to a context
+// 64(BLOCK_8X8, BLOCK_4x16, BLOCK_16X4) -> 0
+// 128(BLOCK_8X16, BLOCK_16x8) -> 1
+// ...
+// 4096(BLOCK_64X64) -> 6
+#define PALATTE_BSIZE_CTXS 7
-#if CONFIG_KF_CTX
#define KF_MODE_CONTEXTS 5
-#endif
struct AV1Common;
@@ -80,643 +66,128 @@ typedef struct {
const int16_t *neighbors;
} SCAN_ORDER;
-struct seg_counts {
- unsigned int tree_total[MAX_SEGMENTS];
- unsigned int tree_mispred[MAX_SEGMENTS];
- unsigned int pred[PREDICTION_PROBS][2];
-};
-
typedef struct frame_contexts {
- aom_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
- aom_prob uv_mode_prob[INTRA_MODES][UV_INTRA_MODES - 1];
-#if CONFIG_EXT_PARTITION_TYPES
- aom_prob partition_prob[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1];
-#else
- aom_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-#endif
- coeff_cdf_model coef_tail_cdfs[TX_SIZES][PLANE_TYPES];
- coeff_cdf_model coef_head_cdfs[TX_SIZES][PLANE_TYPES];
-#if CONFIG_ADAPT_SCAN
-// TODO(angiebird): try aom_prob
-#if CONFIG_CHROMA_2X2
- uint32_t non_zero_prob_2x2[TX_TYPES][4];
-#endif
- uint32_t non_zero_prob_4X4[TX_TYPES][16];
- uint32_t non_zero_prob_8X8[TX_TYPES][64];
- uint32_t non_zero_prob_16X16[TX_TYPES][256];
- uint32_t non_zero_prob_32X32[TX_TYPES][1024];
-
- uint32_t non_zero_prob_4X8[TX_TYPES][32];
- uint32_t non_zero_prob_8X4[TX_TYPES][32];
- uint32_t non_zero_prob_16X8[TX_TYPES][128];
- uint32_t non_zero_prob_8X16[TX_TYPES][128];
- uint32_t non_zero_prob_32X16[TX_TYPES][512];
- uint32_t non_zero_prob_16X32[TX_TYPES][512];
-
-#if CONFIG_CHROMA_2X2
- DECLARE_ALIGNED(16, int16_t, scan_2x2[TX_TYPES][4]);
-#endif
- DECLARE_ALIGNED(16, int16_t, scan_4X4[TX_TYPES][16]);
- DECLARE_ALIGNED(16, int16_t, scan_8X8[TX_TYPES][64]);
- DECLARE_ALIGNED(16, int16_t, scan_16X16[TX_TYPES][256]);
- DECLARE_ALIGNED(16, int16_t, scan_32X32[TX_TYPES][1024]);
-
- DECLARE_ALIGNED(16, int16_t, scan_4X8[TX_TYPES][32]);
- DECLARE_ALIGNED(16, int16_t, scan_8X4[TX_TYPES][32]);
- DECLARE_ALIGNED(16, int16_t, scan_8X16[TX_TYPES][128]);
- DECLARE_ALIGNED(16, int16_t, scan_16X8[TX_TYPES][128]);
- DECLARE_ALIGNED(16, int16_t, scan_16X32[TX_TYPES][512]);
- DECLARE_ALIGNED(16, int16_t, scan_32X16[TX_TYPES][512]);
-
-#if CONFIG_CHROMA_2X2
- DECLARE_ALIGNED(16, int16_t, iscan_2x2[TX_TYPES][4]);
-#endif
- DECLARE_ALIGNED(16, int16_t, iscan_4X4[TX_TYPES][16]);
- DECLARE_ALIGNED(16, int16_t, iscan_8X8[TX_TYPES][64]);
- DECLARE_ALIGNED(16, int16_t, iscan_16X16[TX_TYPES][256]);
- DECLARE_ALIGNED(16, int16_t, iscan_32X32[TX_TYPES][1024]);
-
- DECLARE_ALIGNED(16, int16_t, iscan_4X8[TX_TYPES][32]);
- DECLARE_ALIGNED(16, int16_t, iscan_8X4[TX_TYPES][32]);
- DECLARE_ALIGNED(16, int16_t, iscan_8X16[TX_TYPES][128]);
- DECLARE_ALIGNED(16, int16_t, iscan_16X8[TX_TYPES][128]);
- DECLARE_ALIGNED(16, int16_t, iscan_16X32[TX_TYPES][512]);
- DECLARE_ALIGNED(16, int16_t, iscan_32X16[TX_TYPES][512]);
-
-#if CONFIG_CHROMA_2X2
- int16_t nb_2x2[TX_TYPES][(4 + 1) * 2];
-#endif
- int16_t nb_4X4[TX_TYPES][(16 + 1) * 2];
- int16_t nb_8X8[TX_TYPES][(64 + 1) * 2];
- int16_t nb_16X16[TX_TYPES][(256 + 1) * 2];
- int16_t nb_32X32[TX_TYPES][(1024 + 1) * 2];
-
- int16_t nb_4X8[TX_TYPES][(32 + 1) * 2];
- int16_t nb_8X4[TX_TYPES][(32 + 1) * 2];
- int16_t nb_8X16[TX_TYPES][(128 + 1) * 2];
- int16_t nb_16X8[TX_TYPES][(128 + 1) * 2];
- int16_t nb_16X32[TX_TYPES][(512 + 1) * 2];
- int16_t nb_32X16[TX_TYPES][(512 + 1) * 2];
-
- SCAN_ORDER sc[TX_SIZES_ALL][TX_TYPES];
-
- int16_t eob_threshold[TX_SIZES_ALL][TX_TYPES][EOB_THRESHOLD_NUM];
-#endif // CONFIG_ADAPT_SCAN
-
-#if CONFIG_LV_MAP
- aom_prob txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS];
- aom_prob nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS];
- aom_prob eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS];
- aom_prob dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS];
- aom_prob coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
- [COEFF_BASE_CONTEXTS];
- aom_prob coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS];
-#if BR_NODE
- aom_prob coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS];
-#endif
-#if CONFIG_CTX1D
- aom_prob eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES];
- aom_prob empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES][EMPTY_LINE_CONTEXTS];
- aom_prob hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS];
-#endif // CONFIG_CTX1D
-
-#if LV_MAP_PROB
aom_cdf_prob txb_skip_cdf[TX_SIZES][TXB_SKIP_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob nz_map_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
- [CDF_SIZE(2)];
- aom_cdf_prob eob_flag_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]
- [CDF_SIZE(2)];
- aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
- [COEFF_BASE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob coeff_lps_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
+ aom_cdf_prob eob_extra_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]
[CDF_SIZE(2)];
-#if BR_NODE
- aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS]
- [LEVEL_CONTEXTS][CDF_SIZE(2)];
-#endif
-#if CONFIG_CTX1D
- aom_cdf_prob eob_mode_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][CDF_SIZE(2)];
- aom_cdf_prob empty_line_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]
- [EMPTY_LINE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob hv_eob_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS]
- [CDF_SIZE(2)];
-#endif // CONFIG_CTX1D
-#endif // LV_MAP_PROB
-#endif
+ aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)];
+ aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][2][CDF_SIZE(5)];
+ aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][2][CDF_SIZE(6)];
+ aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][2][CDF_SIZE(7)];
+ aom_cdf_prob eob_flag_cdf128[PLANE_TYPES][2][CDF_SIZE(8)];
+ aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][2][CDF_SIZE(9)];
+ aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][2][CDF_SIZE(10)];
+ aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][2][CDF_SIZE(11)];
+ aom_cdf_prob coeff_base_eob_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB]
+ [CDF_SIZE(3)];
+ aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
+ [CDF_SIZE(4)];
+ aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
+ [CDF_SIZE(BR_CDF_SIZE)];
- aom_prob newmv_prob[NEWMV_MODE_CONTEXTS];
- aom_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
- aom_prob refmv_prob[REFMV_MODE_CONTEXTS];
- aom_prob drl_prob[DRL_MODE_CONTEXTS];
-#if CONFIG_NEW_MULTISYMBOL
aom_cdf_prob newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob zeromv_cdf[ZEROMV_MODE_CONTEXTS][CDF_SIZE(2)];
+ aom_cdf_prob zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)];
-#endif
- aom_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS]
- [INTER_COMPOUND_MODES - 1];
aom_cdf_prob inter_compound_mode_cdf[INTER_MODE_CONTEXTS]
[CDF_SIZE(INTER_COMPOUND_MODES)];
-#if CONFIG_COMPOUND_SINGLEREF
- aom_prob inter_singleref_comp_mode_probs[INTER_MODE_CONTEXTS]
- [INTER_SINGLEREF_COMP_MODES - 1];
- aom_cdf_prob inter_singleref_comp_mode_cdf[INTER_MODE_CONTEXTS][CDF_SIZE(
- INTER_SINGLEREF_COMP_MODES)];
-#endif // CONFIG_COMPOUND_SINGLEREF
- aom_prob compound_type_prob[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
- aom_cdf_prob compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)];
-#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
-#if CONFIG_INTERINTRA
- aom_prob interintra_prob[BLOCK_SIZE_GROUPS];
- aom_prob wedge_interintra_prob[BLOCK_SIZES_ALL];
- aom_prob interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1];
-#if CONFIG_NEW_MULTISYMBOL
+ aom_cdf_prob compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)];
+ aom_cdf_prob wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)];
aom_cdf_prob interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(2)];
aom_cdf_prob wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
-#endif
aom_cdf_prob interintra_mode_cdf[BLOCK_SIZE_GROUPS]
[CDF_SIZE(INTERINTRA_MODES)];
-#endif // CONFIG_INTERINTRA
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- aom_prob motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1];
aom_cdf_prob motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)];
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
- aom_prob ncobmc_mode_prob[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES - 1];
- aom_cdf_prob ncobmc_mode_cdf[ADAPT_OVERLAP_BLOCKS]
- [CDF_SIZE(MAX_NCOBMC_MODES)];
-#endif
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- aom_prob ncobmc_prob[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES - 1];
- aom_cdf_prob ncobmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(OBMC_FAMILY_MODES)];
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
- aom_prob obmc_prob[BLOCK_SIZES_ALL];
-#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
aom_cdf_prob obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- aom_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
- aom_prob comp_inter_prob[COMP_INTER_CONTEXTS];
- aom_cdf_prob palette_y_size_cdf[PALETTE_BLOCK_SIZES][CDF_SIZE(PALETTE_SIZES)];
- aom_cdf_prob palette_uv_size_cdf[PALETTE_BLOCK_SIZES]
- [CDF_SIZE(PALETTE_SIZES)];
+ aom_cdf_prob palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
+ aom_cdf_prob palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
aom_cdf_prob palette_y_color_index_cdf[PALETTE_SIZES]
[PALETTE_COLOR_INDEX_CONTEXTS]
[CDF_SIZE(PALETTE_COLORS)];
aom_cdf_prob palette_uv_color_index_cdf[PALETTE_SIZES]
[PALETTE_COLOR_INDEX_CONTEXTS]
[CDF_SIZE(PALETTE_COLORS)];
-#if CONFIG_MRC_TX
- aom_cdf_prob mrc_mask_inter_cdf[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
- [CDF_SIZE(PALETTE_COLORS)];
- aom_cdf_prob mrc_mask_intra_cdf[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
- [CDF_SIZE(PALETTE_COLORS)];
-#endif // CONFIG_MRC_TX
-#if CONFIG_NEW_MULTISYMBOL
- aom_cdf_prob palette_y_mode_cdf[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
+ aom_cdf_prob palette_y_mode_cdf[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS]
[CDF_SIZE(2)];
aom_cdf_prob palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)];
-#endif
-#if CONFIG_EXT_COMP_REFS
- aom_prob comp_ref_type_prob[COMP_REF_TYPE_CONTEXTS];
- aom_prob uni_comp_ref_prob[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1];
-#if CONFIG_NEW_MULTISYMBOL
aom_cdf_prob comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
[CDF_SIZE(2)];
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_EXT_COMP_REFS
- aom_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS - 1];
-#if CONFIG_EXT_REFS
- aom_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS - 1];
- aom_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS - 1];
-#else
- aom_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1];
-#endif // CONFIG_EXT_REFS
-#if CONFIG_NEW_MULTISYMBOL
-#if CONFIG_EXT_REFS
aom_cdf_prob comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)];
aom_cdf_prob comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)];
-#else
- aom_cdf_prob comp_ref_cdf[REF_CONTEXTS][COMP_REFS - 1][CDF_SIZE(2)];
-#endif // CONFIG_EXT_REFS
-#endif
-#if CONFIG_COMPOUND_SINGLEREF
- aom_prob comp_inter_mode_prob[COMP_INTER_MODE_CONTEXTS];
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- aom_prob quarter_tx_size_prob;
-#if CONFIG_NEW_MULTISYMBOL
- aom_cdf_prob quarter_tx_size_cdf[CDF_SIZE(2)];
-#endif
-#endif
-#if CONFIG_VAR_TX
- aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
-#if CONFIG_NEW_MULTISYMBOL
aom_cdf_prob txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)];
-#endif
-#endif // CONFIG_VAR_TX
- aom_prob skip_probs[SKIP_CONTEXTS];
-#if CONFIG_NEW_MULTISYMBOL
+ aom_cdf_prob compound_index_cdf[COMP_INDEX_CONTEXTS][CDF_SIZE(2)];
+ aom_cdf_prob comp_group_idx_cdf[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)];
+ aom_cdf_prob skip_mode_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)];
aom_cdf_prob intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)];
-#endif
- nmv_context nmvc[NMV_CONTEXTS];
-#if CONFIG_INTRABC
+ nmv_context nmvc;
nmv_context ndvc;
aom_cdf_prob intrabc_cdf[CDF_SIZE(2)];
-#endif
- int initialized;
-#if CONFIG_SUPERTX
- aom_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
-#endif // CONFIG_SUPERTX
struct segmentation_probs seg;
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
- aom_prob intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1];
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- aom_prob filter_intra_probs[PLANE_TYPES];
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_LOOP_RESTORATION
- aom_prob switchable_restore_prob[RESTORE_SWITCHABLE_TYPES - 1];
-#endif // CONFIG_LOOP_RESTORATION
+ aom_cdf_prob filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)];
+ aom_cdf_prob filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)];
+ aom_cdf_prob switchable_restore_cdf[CDF_SIZE(RESTORE_SWITCHABLE_TYPES)];
+ aom_cdf_prob wiener_restore_cdf[CDF_SIZE(2)];
+ aom_cdf_prob sgrproj_restore_cdf[CDF_SIZE(2)];
aom_cdf_prob y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)];
- aom_cdf_prob uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)];
-#if CONFIG_EXT_PARTITION_TYPES
+ aom_cdf_prob uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]
+ [CDF_SIZE(UV_INTRA_MODES)];
aom_cdf_prob partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)];
-#else
- aom_cdf_prob partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(PARTITION_TYPES)];
-#endif
aom_cdf_prob switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]
[CDF_SIZE(SWITCHABLE_FILTERS)];
-/* kf_y_cdf is discarded after use, so does not require persistent storage.
- However, we keep it with the other CDFs in this struct since it needs to
- be copied to each tile to support parallelism just like the others.
-*/
-#if CONFIG_KF_CTX
+ /* kf_y_cdf is discarded after use, so does not require persistent storage.
+ However, we keep it with the other CDFs in this struct since it needs to
+ be copied to each tile to support parallelism just like the others.
+ */
aom_cdf_prob kf_y_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]
[CDF_SIZE(INTRA_MODES)];
-#else
- aom_cdf_prob kf_y_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(INTRA_MODES)];
-#endif
- aom_cdf_prob tx_size_cdf[MAX_TX_DEPTH][TX_SIZE_CONTEXTS]
+
+ aom_cdf_prob angle_delta_cdf[DIRECTIONAL_MODES]
+ [CDF_SIZE(2 * MAX_ANGLE_DELTA + 1)];
+
+ aom_cdf_prob tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS]
[CDF_SIZE(MAX_TX_DEPTH + 1)];
aom_cdf_prob delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)];
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
aom_cdf_prob delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)];
-#endif // CONFIG_LOOPFILTER_LEVEL
aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)];
-#endif
-#if CONFIG_EXT_TX
aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
[CDF_SIZE(TX_TYPES)];
aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES]
[CDF_SIZE(TX_TYPES)];
-#else
- aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][CDF_SIZE(TX_TYPES)];
- aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)];
-#endif // CONFIG_EXT_TX
-#if CONFIG_LGT_FROM_PRED
- aom_prob intra_lgt_prob[LGT_SIZES][INTRA_MODES];
- aom_prob inter_lgt_prob[LGT_SIZES];
-#endif // CONFIG_LGT_FROM_PRED
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- aom_cdf_prob intra_filter_cdf[INTRA_FILTERS + 1][CDF_SIZE(INTRA_FILTERS)];
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- aom_prob delta_q_prob[DELTA_Q_PROBS];
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- aom_prob delta_lf_multi_prob[FRAME_LF_COUNT][DELTA_LF_PROBS];
-#endif // CONFIG_LOOPFILTER_LEVEL
- aom_prob delta_lf_prob[DELTA_LF_PROBS];
-#endif
-#if CONFIG_PVQ
- // TODO(any): If PVQ is enabled, most of coefficient related cdf,
- // such as coef_cdfs[], coef_tail_cdfs[], and coef_heaf_cdfs[] can be removed.
- od_adapt_ctx pvq_context;
-#endif // CONFIG_PVQ
-#if CONFIG_CFL
aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)];
aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)];
-#endif
-#if CONFIG_LPF_SB
- aom_cdf_prob lpf_reuse_cdf[LPF_REUSE_CONTEXT][CDF_SIZE(2)];
- aom_cdf_prob lpf_delta_cdf[LPF_DELTA_CONTEXT][CDF_SIZE(DELTA_RANGE)];
- aom_cdf_prob lpf_sign_cdf[LPF_REUSE_CONTEXT][LPF_SIGN_CONTEXT][CDF_SIZE(2)];
-#endif // CONFIG_LPF_SB
+ int initialized;
} FRAME_CONTEXT;
-typedef struct FRAME_COUNTS {
-// Note: This structure should only contain 'unsigned int' fields, or
-// aggregates built solely from 'unsigned int' fields/elements
-#if CONFIG_ENTROPY_STATS
- unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
- unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
- unsigned int uv_mode[INTRA_MODES][UV_INTRA_MODES];
-#endif // CONFIG_ENTROPY_STATS
-#if CONFIG_EXT_PARTITION_TYPES
- unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
-#else
- unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
-#endif
- unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
- [SWITCHABLE_FILTERS];
-#if CONFIG_ADAPT_SCAN
-#if CONFIG_CHROMA_2X2
- unsigned int non_zero_count_2x2[TX_TYPES][4];
-#endif // CONFIG_CHROMA_2X2
- unsigned int non_zero_count_4X4[TX_TYPES][16];
- unsigned int non_zero_count_8X8[TX_TYPES][64];
- unsigned int non_zero_count_16X16[TX_TYPES][256];
- unsigned int non_zero_count_32X32[TX_TYPES][1024];
-
- unsigned int non_zero_count_4x8[TX_TYPES][32];
- unsigned int non_zero_count_8x4[TX_TYPES][32];
- unsigned int non_zero_count_8x16[TX_TYPES][128];
- unsigned int non_zero_count_16x8[TX_TYPES][128];
- unsigned int non_zero_count_16x32[TX_TYPES][512];
- unsigned int non_zero_count_32x16[TX_TYPES][512];
-
- unsigned int txb_count[TX_SIZES_ALL][TX_TYPES];
-#endif // CONFIG_ADAPT_SCAN
-
-#if CONFIG_LV_MAP
- unsigned int txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS][2];
- unsigned int nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS][2];
- unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2];
- unsigned int dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS][2];
- unsigned int coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
- [COEFF_BASE_CONTEXTS][2];
- unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS][2];
- unsigned int coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS]
- [2];
-#if CONFIG_CTX1D
- unsigned int eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES][2];
- unsigned int empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]
- [EMPTY_LINE_CONTEXTS][2];
- unsigned int hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS][2];
-#endif // CONFIG_CTX1D
-#endif // CONFIG_LV_MAP
-
-#if CONFIG_SYMBOLRATE
- unsigned int coeff_num[2]; // 0: zero coeff 1: non-zero coeff
- unsigned int symbol_num[2]; // 0: entropy symbol 1: non-entropy symbol
-#endif
-
- unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
- unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
- unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
- unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
-
- unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
-#if CONFIG_COMPOUND_SINGLEREF
- unsigned int inter_singleref_comp_mode[INTER_MODE_CONTEXTS]
- [INTER_SINGLEREF_COMP_MODES];
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_INTERINTRA
- unsigned int interintra[BLOCK_SIZE_GROUPS][2];
- unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
- unsigned int wedge_interintra[BLOCK_SIZES_ALL][2];
-#endif // CONFIG_INTERINTRA
- unsigned int compound_interinter[BLOCK_SIZES_ALL][COMPOUND_TYPES];
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES];
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
- unsigned int ncobmc_mode[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
-#endif
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- unsigned int ncobmc[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES];
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
- unsigned int obmc[BLOCK_SIZES_ALL][2];
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
- unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
-#if CONFIG_EXT_COMP_REFS
- unsigned int comp_ref_type[COMP_REF_TYPE_CONTEXTS][2];
- unsigned int uni_comp_ref[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1][2];
-#endif // CONFIG_EXT_COMP_REFS
- unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2];
-#if CONFIG_EXT_REFS
- unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2];
- unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2];
-#else
- unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2];
-#endif // CONFIG_EXT_REFS
-#if CONFIG_COMPOUND_SINGLEREF
- unsigned int comp_inter_mode[COMP_INTER_MODE_CONTEXTS][2];
-#endif // CONFIG_COMPOUND_SINGLEREF
- // TODO(urvang): Only needed for !CONFIG_VAR_TX case. So can be removed when
- // CONFIG_VAR_TX flag is removed.
- unsigned int tx_size[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH + 1];
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- unsigned int quarter_tx_size[2];
-#endif
-#if CONFIG_VAR_TX
- unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
-#endif
- unsigned int skip[SKIP_CONTEXTS][2];
- nmv_context_counts mv[NMV_CONTEXTS];
-#if CONFIG_INTRABC
- unsigned int intrabc[2];
- nmv_context_counts dv;
-#endif
-#if CONFIG_LGT_FROM_PRED
- unsigned int intra_lgt[LGT_SIZES][INTRA_MODES][2];
- unsigned int inter_lgt[LGT_SIZES][2];
-#endif // CONFIG_LGT_FROM_PRED
- unsigned int delta_q[DELTA_Q_PROBS][2];
-#if CONFIG_EXT_DELTA_Q
-#if CONFIG_LOOPFILTER_LEVEL
- unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2];
-#endif // CONFIG_LOOPFILTER_LEVEL
- unsigned int delta_lf[DELTA_LF_PROBS][2];
-#endif
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
- unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-#if CONFIG_ENTROPY_STATS
-#if CONFIG_EXT_TX
- unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
- unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
- [TX_TYPES];
-#else
- unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
- unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
-#endif // CONFIG_EXT_TX
-#endif // CONFIG_ENTROPY_STATS
-#if CONFIG_SUPERTX
- unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
- unsigned int supertx_size[TX_SIZES];
-#endif // CONFIG_SUPERTX
- struct seg_counts seg;
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
- unsigned int intra_filter[INTRA_FILTERS + 1][INTRA_FILTERS];
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- unsigned int filter_intra[PLANE_TYPES][2];
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_LPF_SB
- unsigned int lpf_reuse[LPF_REUSE_CONTEXT][2];
- unsigned int lpf_delta[LPF_DELTA_CONTEXT][DELTA_RANGE];
- unsigned int lpf_sign[LPF_SIGN_CONTEXT][2];
-#endif // CONFIG_LPF_SB
-} FRAME_COUNTS;
-
-#if CONFIG_KF_CTX
-extern const aom_cdf_prob default_kf_y_mode_cdf[KF_MODE_CONTEXTS]
- [KF_MODE_CONTEXTS]
- [CDF_SIZE(INTRA_MODES)];
-#else
-extern const aom_cdf_prob default_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES]
- [CDF_SIZE(INTRA_MODES)];
-#endif
-
-extern const aom_prob av1_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES]
- [PALETTE_Y_MODE_CONTEXTS];
-extern const aom_prob
- av1_default_palette_uv_mode_prob[PALETTE_UV_MODE_CONTEXTS];
-
-#if CONFIG_EXT_TX
static const int av1_ext_tx_ind[EXT_TX_SET_TYPES][TX_TYPES] = {
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
-#if CONFIG_MRC_TX
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
- },
- {
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
- },
-#endif // CONFIG_MRC_TX
- {
- 1, 3, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 1, 5, 6, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0,
- },
- {
- 3, 4, 5, 8, 6, 7, 9, 10, 11, 0, 1, 2, 0, 0, 0, 0,
- },
- {
- 7, 8, 9, 12, 10, 11, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6,
- },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 3, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 5, 6, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0 },
+ { 3, 4, 5, 8, 6, 7, 9, 10, 11, 0, 1, 2, 0, 0, 0, 0 },
+ { 7, 8, 9, 12, 10, 11, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6 },
};
static const int av1_ext_tx_inv[EXT_TX_SET_TYPES][TX_TYPES] = {
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
-#if CONFIG_MRC_TX
- {
- 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 9, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
-#endif // CONFIG_MRC_TX
- {
- 9, 0, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 9, 0, 10, 11, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 9, 10, 11, 0, 1, 2, 4, 5, 3, 6, 7, 8, 0, 0, 0, 0,
- },
- {
- 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8,
- },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 9, 0, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 9, 0, 10, 11, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 9, 10, 11, 0, 1, 2, 4, 5, 3, 6, 7, 8, 0, 0, 0, 0 },
+ { 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8 },
};
-#else
-#if CONFIG_MRC_TX
-static const int av1_ext_tx_ind[TX_TYPES] = {
- 0, 3, 4, 2, 1,
-};
-static const int av1_ext_tx_inv[TX_TYPES] = {
- 0, 4, 3, 1, 2,
-};
-#else
-static const int av1_ext_tx_ind[TX_TYPES] = {
- 0, 2, 3, 1,
-};
-static const int av1_ext_tx_inv[TX_TYPES] = {
- 0, 3, 1, 2,
-};
-#endif // CONFIG_MRC_TX
-#endif // CONFIG_EXT_TX
-
-#if CONFIG_INTERINTRA
-extern const aom_tree_index
- av1_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)];
-#endif
-extern const aom_tree_index
- av1_inter_compound_mode_tree[TREE_SIZE(INTER_COMPOUND_MODES)];
-#if CONFIG_COMPOUND_SINGLEREF
-extern const aom_tree_index
- av1_inter_singleref_comp_mode_tree[TREE_SIZE(INTER_SINGLEREF_COMP_MODES)];
-#endif // CONFIG_COMPOUND_SINGLEREF
-extern const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)];
-extern const aom_tree_index av1_partition_tree[TREE_SIZE(PARTITION_TYPES)];
-#if CONFIG_EXT_PARTITION_TYPES
-extern const aom_tree_index
- av1_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)];
-#endif
-extern const aom_tree_index
- av1_palette_color_index_tree[PALETTE_SIZES][TREE_SIZE(PALETTE_COLORS)];
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-extern const aom_tree_index av1_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-#if CONFIG_EXT_TX
-extern const aom_tree_index av1_ext_tx_tree[EXT_TX_SET_TYPES]
- [TREE_SIZE(TX_TYPES)];
-#else
-extern const aom_tree_index av1_ext_tx_tree[TREE_SIZE(TX_TYPES)];
-#endif // CONFIG_EXT_TX
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-extern const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)];
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-extern const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)];
-#if CONFIG_WARPED_MOTION
-extern const aom_tree_index av1_ncobmc_tree[TREE_SIZE(OBMC_FAMILY_MODES)];
-#endif // CONFIG_WARPED_MOTION
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#if CONFIG_LOOP_RESTORATION
-#define RESTORE_NONE_SGRPROJ_PROB 64
-#define RESTORE_NONE_BILATERAL_PROB 16
-#define RESTORE_NONE_WIENER_PROB 64
-#define RESTORE_NONE_DOMAINTXFMRF_PROB 64
-extern const aom_tree_index
- av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)];
-#endif // CONFIG_LOOP_RESTORATION
+void av1_set_default_ref_deltas(int8_t *ref_deltas);
+void av1_set_default_mode_deltas(int8_t *mode_deltas);
+void av1_setup_frame_contexts(struct AV1Common *cm);
void av1_setup_past_independence(struct AV1Common *cm);
-void av1_adapt_intra_frame_probs(struct AV1Common *cm);
-void av1_adapt_inter_frame_probs(struct AV1Common *cm);
-
static INLINE int av1_ceil_log2(int n) {
+ if (n < 2) return 0;
int i = 1, p = 2;
while (p < n) {
i++;
diff --git a/third_party/aom/av1/common/entropymv.c b/third_party/aom/av1/common/entropymv.c
index 2d0191366..446aa433c 100644
--- a/third_party/aom/av1/common/entropymv.c
+++ b/third_party/aom/av1/common/entropymv.c
@@ -12,100 +12,51 @@
#include "av1/common/onyxc_int.h"
#include "av1/common/entropymv.h"
-// Integer pel reference mv threshold for use of high-precision 1/8 mv
-#define COMPANDED_MVREF_THRESH 8
-
-const aom_tree_index av1_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
- -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
-};
-
-/* clang-format off */
-const aom_tree_index av1_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
- -MV_CLASS_0, 2,
- -MV_CLASS_1, 4,
- 6, 8,
- -MV_CLASS_2, -MV_CLASS_3,
- 10, 12,
- -MV_CLASS_4, -MV_CLASS_5,
- -MV_CLASS_6, 14,
- 16, 18,
- -MV_CLASS_7, -MV_CLASS_8,
- -MV_CLASS_9, -MV_CLASS_10,
-};
-/* clang-format on */
-
-const aom_tree_index av1_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
- -0, -1,
-};
-
-const aom_tree_index av1_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
- 4, -2, -3 };
-
static const nmv_context default_nmv_context = {
- { 32, 64, 96 }, // joints
- { AOM_ICDF(4096), AOM_ICDF(11264), AOM_ICDF(19328), AOM_ICDF(32768),
- 0 }, // joint_cdf
+ { AOM_CDF4(4096, 11264, 19328) }, // joints_cdf
{ {
// Vertical component
- 128, // sign
- { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, // class
- { AOM_ICDF(28672), AOM_ICDF(30976), AOM_ICDF(31858), AOM_ICDF(32320),
- AOM_ICDF(32551), AOM_ICDF(32656), AOM_ICDF(32740), AOM_ICDF(32757),
- AOM_ICDF(32762), AOM_ICDF(32767), AOM_ICDF(32768), 0 }, // class_cdf
- { 216 }, // class0
- { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
- { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
- { 64, 96, 64 }, // fp
- { { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(26624), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(12288), AOM_ICDF(21248), AOM_ICDF(24128), AOM_ICDF(32768),
- 0 } }, // class0_fp_cdf
- { AOM_ICDF(8192), AOM_ICDF(17408), AOM_ICDF(21248), AOM_ICDF(32768),
- 0 }, // fp_cdf
- 160, // class0_hp bit
- 128, // hp
-#if CONFIG_NEW_MULTISYMBOL
- { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
- { { AOM_ICDF(128 * 196), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 198), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 208), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 224), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 245), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 } }, // bits_cdf
-#endif
+ { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757,
+ 32762, 32767) }, // class_cdf // fp
+ { { AOM_CDF4(16384, 24576, 26624) },
+ { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf
+ { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf
+ { AOM_CDF2(128 * 128) }, // sign_cdf
+ { AOM_CDF2(160 * 128) }, // class0_hp_cdf
+ { AOM_CDF2(128 * 128) }, // hp_cdf
+ { AOM_CDF2(216 * 128) }, // class0_cdf
+ { { AOM_CDF2(128 * 136) },
+ { AOM_CDF2(128 * 140) },
+ { AOM_CDF2(128 * 148) },
+ { AOM_CDF2(128 * 160) },
+ { AOM_CDF2(128 * 176) },
+ { AOM_CDF2(128 * 192) },
+ { AOM_CDF2(128 * 224) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 240) } }, // bits_cdf
},
{
// Horizontal component
- 128, // sign
- { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, // class
- { AOM_ICDF(28672), AOM_ICDF(30976), AOM_ICDF(31858), AOM_ICDF(32320),
- AOM_ICDF(32551), AOM_ICDF(32656), AOM_ICDF(32740), AOM_ICDF(32757),
- AOM_ICDF(32762), AOM_ICDF(32767), AOM_ICDF(32768), 0 }, // class_cdf
- { 208 }, // class0
- { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
- { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
- { 64, 96, 64 }, // fp
- { { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(26624), AOM_ICDF(32768),
- 0 },
- { AOM_ICDF(12288), AOM_ICDF(21248), AOM_ICDF(24128), AOM_ICDF(32768),
- 0 } }, // class0_fp_cdf
- { AOM_ICDF(8192), AOM_ICDF(17408), AOM_ICDF(21248), AOM_ICDF(32768),
- 0 }, // fp_cdf
- 160, // class0_hp bit
- 128, // hp
-#if CONFIG_NEW_MULTISYMBOL
- { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
- { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
- { { AOM_ICDF(128 * 196), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 198), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 208), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 224), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 245), AOM_ICDF(32768), 0 },
- { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 } }, // bits_cdf
-#endif
+ { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757,
+ 32762, 32767) }, // class_cdf // fp
+ { { AOM_CDF4(16384, 24576, 26624) },
+ { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf
+ { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf
+ { AOM_CDF2(128 * 128) }, // sign_cdf
+ { AOM_CDF2(160 * 128) }, // class0_hp_cdf
+ { AOM_CDF2(128 * 128) }, // hp_cdf
+ { AOM_CDF2(216 * 128) }, // class0_cdf
+ { { AOM_CDF2(128 * 136) },
+ { AOM_CDF2(128 * 140) },
+ { AOM_CDF2(128 * 148) },
+ { AOM_CDF2(128 * 160) },
+ { AOM_CDF2(128 * 176) },
+ { AOM_CDF2(128 * 192) },
+ { AOM_CDF2(128 * 224) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 234) },
+ { AOM_CDF2(128 * 240) } }, // bits_cdf
} },
};
@@ -164,104 +115,8 @@ MV_CLASS_TYPE av1_get_mv_class(int z, int *offset) {
return c;
}
-static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
- MvSubpelPrecision precision) {
- int s, z, c, o, d, e, f;
- assert(v != 0); /* should not be zero */
- s = v < 0;
- comp_counts->sign[s] += incr;
- z = (s ? -v : v) - 1; /* magnitude - 1 */
-
- c = av1_get_mv_class(z, &o);
- comp_counts->classes[c] += incr;
-
- d = (o >> 3); /* int mv data */
- f = (o >> 1) & 3; /* fractional pel mv data */
- e = (o & 1); /* high precision mv data */
-
- if (c == MV_CLASS_0) {
- comp_counts->class0[d] += incr;
-#if CONFIG_INTRABC || CONFIG_AMVR
- if (precision > MV_SUBPEL_NONE)
-#endif
- comp_counts->class0_fp[d][f] += incr;
- if (precision > MV_SUBPEL_LOW_PRECISION) comp_counts->class0_hp[e] += incr;
- } else {
- int i;
- int b = c + CLASS0_BITS - 1; // number of bits
- for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr;
-#if CONFIG_INTRABC || CONFIG_AMVR
- if (precision > MV_SUBPEL_NONE)
-#endif
- comp_counts->fp[f] += incr;
- if (precision > MV_SUBPEL_LOW_PRECISION) comp_counts->hp[e] += incr;
- }
-}
-
-void av1_inc_mv(const MV *mv, nmv_context_counts *counts,
- MvSubpelPrecision precision) {
- if (counts != NULL) {
- const MV_JOINT_TYPE j = av1_get_mv_joint(mv);
- ++counts->joints[j];
-
- if (mv_joint_vertical(j))
- inc_mv_component(mv->row, &counts->comps[0], 1, precision);
-
- if (mv_joint_horizontal(j))
- inc_mv_component(mv->col, &counts->comps[1], 1, precision);
- }
-}
-
-void av1_adapt_mv_probs(AV1_COMMON *cm, int allow_hp) {
- int i, j;
- int idx;
- for (idx = 0; idx < NMV_CONTEXTS; ++idx) {
- nmv_context *nmvc = &cm->fc->nmvc[idx];
- const nmv_context *pre_nmvc = &cm->pre_fc->nmvc[idx];
- const nmv_context_counts *counts = &cm->counts.mv[idx];
- aom_tree_merge_probs(av1_mv_joint_tree, pre_nmvc->joints, counts->joints,
- nmvc->joints);
- for (i = 0; i < 2; ++i) {
- nmv_component *comp = &nmvc->comps[i];
- const nmv_component *pre_comp = &pre_nmvc->comps[i];
- const nmv_component_counts *c = &counts->comps[i];
-
- comp->sign = av1_mode_mv_merge_probs(pre_comp->sign, c->sign);
- aom_tree_merge_probs(av1_mv_class_tree, pre_comp->classes, c->classes,
- comp->classes);
- aom_tree_merge_probs(av1_mv_class0_tree, pre_comp->class0, c->class0,
- comp->class0);
-
- for (j = 0; j < MV_OFFSET_BITS; ++j)
- comp->bits[j] = av1_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
-#if CONFIG_AMVR
- if (cm->cur_frame_mv_precision_level == 0) {
-#endif
- for (j = 0; j < CLASS0_SIZE; ++j)
- aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->class0_fp[j],
- c->class0_fp[j], comp->class0_fp[j]);
-
- aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
-
- if (allow_hp) {
- comp->class0_hp =
- av1_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
- comp->hp = av1_mode_mv_merge_probs(pre_comp->hp, c->hp);
- }
-#if CONFIG_AMVR
- }
-#endif
- }
- }
-}
-
void av1_init_mv_probs(AV1_COMMON *cm) {
- int i;
- for (i = 0; i < NMV_CONTEXTS; ++i) {
- // NB: this sets CDFs too
- cm->fc->nmvc[i] = default_nmv_context;
- }
-#if CONFIG_INTRABC
+ // NB: this sets CDFs too
+ cm->fc->nmvc = default_nmv_context;
cm->fc->ndvc = default_nmv_context;
-#endif // CONFIG_INTRABC
}
diff --git a/third_party/aom/av1/common/entropymv.h b/third_party/aom/av1/common/entropymv.h
index 9ce089f7d..02ca7b66b 100644
--- a/third_party/aom/av1/common/entropymv.h
+++ b/third_party/aom/av1/common/entropymv.h
@@ -12,7 +12,7 @@
#ifndef AV1_COMMON_ENTROPYMV_H_
#define AV1_COMMON_ENTROPYMV_H_
-#include "./aom_config.h"
+#include "config/aom_config.h"
#include "aom_dsp/prob.h"
@@ -26,8 +26,6 @@ struct AV1Common;
void av1_init_mv_probs(struct AV1Common *cm);
-void av1_adapt_mv_probs(struct AV1Common *cm, int usehp);
-
#define MV_UPDATE_PROB 252
/* Symbols for coding which components are zero jointly */
@@ -66,9 +64,7 @@ typedef enum {
#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
#define CLASS0_SIZE (1 << CLASS0_BITS)
#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
-#if CONFIG_NEW_MULTISYMBOL
#define MV_BITS_CONTEXTS 6
-#endif
#define MV_FP_SIZE 4
#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
@@ -76,37 +72,22 @@ typedef enum {
#define MV_VALS ((MV_MAX << 1) + 1)
#define MV_IN_USE_BITS 14
-#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
+#define MV_UPP (1 << MV_IN_USE_BITS)
#define MV_LOW (-(1 << MV_IN_USE_BITS))
-extern const aom_tree_index av1_mv_joint_tree[];
-extern const aom_tree_index av1_mv_class_tree[];
-extern const aom_tree_index av1_mv_class0_tree[];
-extern const aom_tree_index av1_mv_fp_tree[];
-
typedef struct {
- aom_prob sign;
- aom_prob classes[MV_CLASSES - 1];
- aom_cdf_prob class_cdf[CDF_SIZE(MV_CLASSES)];
- aom_prob class0[CLASS0_SIZE - 1];
- aom_prob bits[MV_OFFSET_BITS];
- aom_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
- aom_prob fp[MV_FP_SIZE - 1];
+ aom_cdf_prob classes_cdf[CDF_SIZE(MV_CLASSES)];
aom_cdf_prob class0_fp_cdf[CLASS0_SIZE][CDF_SIZE(MV_FP_SIZE)];
aom_cdf_prob fp_cdf[CDF_SIZE(MV_FP_SIZE)];
- aom_prob class0_hp;
- aom_prob hp;
-#if CONFIG_NEW_MULTISYMBOL
+ aom_cdf_prob sign_cdf[CDF_SIZE(2)];
aom_cdf_prob class0_hp_cdf[CDF_SIZE(2)];
aom_cdf_prob hp_cdf[CDF_SIZE(2)];
aom_cdf_prob class0_cdf[CDF_SIZE(CLASS0_SIZE)];
- aom_cdf_prob bits_cdf[MV_BITS_CONTEXTS][CDF_SIZE(2)];
-#endif
+ aom_cdf_prob bits_cdf[MV_OFFSET_BITS][CDF_SIZE(2)];
} nmv_component;
typedef struct {
- aom_prob joints[MV_JOINTS - 1];
- aom_cdf_prob joint_cdf[CDF_SIZE(MV_JOINTS)];
+ aom_cdf_prob joints_cdf[CDF_SIZE(MV_JOINTS)];
nmv_component comps[2];
} nmv_context;
@@ -120,33 +101,12 @@ static INLINE MV_JOINT_TYPE av1_get_mv_joint(const MV *mv) {
MV_CLASS_TYPE av1_get_mv_class(int z, int *offset);
-typedef struct {
- unsigned int sign[2];
- unsigned int classes[MV_CLASSES];
- unsigned int class0[CLASS0_SIZE];
- unsigned int bits[MV_OFFSET_BITS][2];
- unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE];
- unsigned int fp[MV_FP_SIZE];
- unsigned int class0_hp[2];
- unsigned int hp[2];
-} nmv_component_counts;
-
-typedef struct {
- unsigned int joints[MV_JOINTS];
- nmv_component_counts comps[2];
-} nmv_context_counts;
-
typedef enum {
-#if CONFIG_INTRABC || CONFIG_AMVR
MV_SUBPEL_NONE = -1,
-#endif
MV_SUBPEL_LOW_PRECISION = 0,
MV_SUBPEL_HIGH_PRECISION,
} MvSubpelPrecision;
-void av1_inc_mv(const MV *mv, nmv_context_counts *mvctx,
- MvSubpelPrecision precision);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/enums.h b/third_party/aom/av1/common/enums.h
index e8c4003cc..a37ee9f24 100644
--- a/third_party/aom/av1/common/enums.h
+++ b/third_party/aom/av1/common/enums.h
@@ -12,7 +12,8 @@
#ifndef AV1_COMMON_ENUMS_H_
#define AV1_COMMON_ENUMS_H_
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom/aom_codec.h"
#include "aom/aom_integer.h"
@@ -22,22 +23,8 @@ extern "C" {
#undef MAX_SB_SIZE
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-#define TWO_MODE
-#endif
-
-#if CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT
-#define NC_MODE_INFO 1
-#else
-#define NC_MODE_INFO 0
-#endif
-
// Max superblock size
-#if CONFIG_EXT_PARTITION
#define MAX_SB_SIZE_LOG2 7
-#else
-#define MAX_SB_SIZE_LOG2 6
-#endif // CONFIG_EXT_PARTITION
#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
@@ -45,11 +32,7 @@ extern "C" {
#define MIN_SB_SIZE_LOG2 6
// Pixels per Mode Info (MI) unit
-#if CONFIG_CB4X4
#define MI_SIZE_LOG2 2
-#else
-#define MI_SIZE_LOG2 3
-#endif
#define MI_SIZE (1 << MI_SIZE_LOG2)
// MI-units per max superblock (MI Block - MIB)
@@ -63,73 +46,78 @@ extern "C" {
#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
// Maximum number of tile rows and tile columns
-#if CONFIG_EXT_TILE
-#define MAX_TILE_ROWS 1024
-#define MAX_TILE_COLS 1024
-#else
-#if CONFIG_MAX_TILE
#define MAX_TILE_ROWS 64
#define MAX_TILE_COLS 64
-#else
-#define MAX_TILE_ROWS 4
-#define MAX_TILE_COLS 64
-#endif
-#endif // CONFIG_EXT_TILE
-#if CONFIG_VAR_TX
#define MAX_VARTX_DEPTH 2
-#define SQR_VARTX_DEPTH_INIT 0
-#define RECT_VARTX_DEPTH_INIT 0
-#endif
#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
+#define MI_SIZE_128X128 (128 >> MI_SIZE_LOG2)
+
+#define MAX_PALETTE_SQUARE (64 * 64)
+// Maximum number of colors in a palette.
+#define PALETTE_MAX_SIZE 8
+// Minimum number of colors in a palette.
+#define PALETTE_MIN_SIZE 2
+
+#define FRAME_OFFSET_BITS 5
+#define MAX_FRAME_DISTANCE ((1 << FRAME_OFFSET_BITS) - 1)
+
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+
+// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
+// in parallel, 3 for scaled references on the encoder.
+// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
+// of framebuffers.
+// TODO(jkoleszar): These 3 extra references could probably come from the
+// normal reference pool.
+#define FRAME_BUFFERS (REF_FRAMES + 7)
-#if CONFIG_LOOPFILTER_LEVEL
// 4 frame filter levels: y plane vertical, y plane horizontal,
// u plane, and v plane
#define FRAME_LF_COUNT 4
#define DEFAULT_DELTA_LF_MULTI 0
-#endif // CONFIG_LOOPFILTER_LEVEL
-
-#if CONFIG_LPF_SB
-#define LPF_DELTA_BITS 3
-#define LPF_STEP 2
-#define DELTA_RANGE (1 << LPF_DELTA_BITS)
-#define MAX_LPF_OFFSET (LPF_STEP * ((1 << LPF_DELTA_BITS) - 1))
-
-#define LPF_REUSE_CONTEXT 2
-#define LPF_DELTA_CONTEXT DELTA_RANGE
-#define LPF_SIGN_CONTEXT 2
-
-// Half of maximum loop filter length (15-tap)
-#define FILT_BOUNDARY_OFFSET 8
-#define FILT_BOUNDARY_MI_OFFSET (FILT_BOUNDARY_OFFSET >> MI_SIZE_LOG2)
-#endif // CONFIG_LPF_SB
-
-// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
-// 00: Profile 0. 8-bit 4:2:0 only.
-// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
-// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling.
-// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
-// sampling.
-// 111: Undefined profile.
+#define MAX_MODE_LF_DELTAS 2
+
+#define DIST_PRECISION_BITS 4
+#define DIST_PRECISION (1 << DIST_PRECISION_BITS) // 16
+
+// TODO(chengchen): Temporal flag serve as experimental flag for WIP
+// bitmask construction.
+// Shall be removed when bitmask code is completely checkedin
+#define LOOP_FILTER_BITMASK 0
+
+#define PROFILE_BITS 3
+// The following three profiles are currently defined.
+// Profile 0. 8-bit and 10-bit 4:2:0 and 4:0:0 only.
+// Profile 1. 8-bit and 10-bit 4:4:4
+// Profile 2. 8-bit and 10-bit 4:2:2
+// 12-bit 4:0:0, 4:2:2 and 4:4:4
+// Since we have three bits for the profiles, it can be extended later.
typedef enum BITSTREAM_PROFILE {
PROFILE_0,
PROFILE_1,
PROFILE_2,
- PROFILE_3,
- MAX_PROFILES
+ MAX_PROFILES,
} BITSTREAM_PROFILE;
+#define LEVEL_MAJOR_BITS 3
+#define LEVEL_MINOR_BITS 2
+#define LEVEL_BITS (LEVEL_MAJOR_BITS + LEVEL_MINOR_BITS)
+
+#define LEVEL_MAJOR_MIN 2
+#define LEVEL_MAJOR_MAX ((1 << LEVEL_MAJOR_BITS) - 1 + LEVEL_MAJOR_MIN)
+#define LEVEL_MINOR_MIN 0
+#define LEVEL_MINOR_MAX ((1 << LEVEL_MINOR_BITS) - 1)
+
+#define OP_POINTS_CNT_MINUS_1_BITS 5
+#define OP_POINTS_IDC_BITS 12
+
// Note: Some enums use the attribute 'packed' to use smallest possible integer
// type, so that we can save memory when they are used in structs/arrays.
typedef enum ATTRIBUTE_PACKED {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- BLOCK_2X2,
- BLOCK_2X4,
- BLOCK_4X2,
-#endif
BLOCK_4X4,
BLOCK_4X8,
BLOCK_8X4,
@@ -143,33 +131,29 @@ typedef enum ATTRIBUTE_PACKED {
BLOCK_32X64,
BLOCK_64X32,
BLOCK_64X64,
-#if CONFIG_EXT_PARTITION
BLOCK_64X128,
BLOCK_128X64,
BLOCK_128X128,
-#endif // CONFIG_EXT_PARTITION
BLOCK_4X16,
BLOCK_16X4,
BLOCK_8X32,
BLOCK_32X8,
BLOCK_16X64,
BLOCK_64X16,
-#if CONFIG_EXT_PARTITION
- BLOCK_32X128,
- BLOCK_128X32,
-#endif // CONFIG_EXT_PARTITION
BLOCK_SIZES_ALL,
BLOCK_SIZES = BLOCK_4X16,
BLOCK_INVALID = 255,
BLOCK_LARGEST = (BLOCK_SIZES - 1)
} BLOCK_SIZE;
-typedef enum {
+// 4X4, 8X8, 16X16, 32X32, 64X64, 128X128
+#define SQR_BLOCK_SIZES 6
+
+typedef enum ATTRIBUTE_PACKED {
PARTITION_NONE,
PARTITION_HORZ,
PARTITION_VERT,
PARTITION_SPLIT,
-#if CONFIG_EXT_PARTITION_TYPES
PARTITION_HORZ_A, // HORZ split and the top partition is split again
PARTITION_HORZ_B, // HORZ split and the bottom partition is split again
PARTITION_VERT_A, // VERT split and the left partition is split again
@@ -177,134 +161,104 @@ typedef enum {
PARTITION_HORZ_4, // 4:1 horizontal partition
PARTITION_VERT_4, // 4:1 vertical partition
EXT_PARTITION_TYPES,
-#endif // CONFIG_EXT_PARTITION_TYPES
PARTITION_TYPES = PARTITION_SPLIT + 1,
PARTITION_INVALID = 255
} PARTITION_TYPE;
typedef char PARTITION_CONTEXT;
#define PARTITION_PLOFFSET 4 // number of probability models per block size
-#define PARTITION_BLOCK_SIZES (4 + CONFIG_EXT_PARTITION)
-#define PARTITION_CONTEXTS_PRIMARY (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET)
-#if CONFIG_UNPOISON_PARTITION_CTX
-#define INVALID_PARTITION_CTX (-1)
-#define PARTITION_CONTEXTS \
- (PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES)
-#else
-#define PARTITION_CONTEXTS PARTITION_CONTEXTS_PRIMARY
-#endif
+#define PARTITION_BLOCK_SIZES 5
+#define PARTITION_CONTEXTS (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET)
// block transform size
+#if defined(_MSC_VER)
+typedef uint8_t TX_SIZE;
+enum ATTRIBUTE_PACKED {
+#else
typedef enum ATTRIBUTE_PACKED {
-#if CONFIG_CHROMA_2X2
- TX_2X2, // 2x2 transform
#endif
- TX_4X4, // 4x4 transform
- TX_8X8, // 8x8 transform
- TX_16X16, // 16x16 transform
- TX_32X32, // 32x32 transform
-#if CONFIG_TX64X64
- TX_64X64, // 64x64 transform
-#endif // CONFIG_TX64X64
- TX_4X8, // 4x8 transform
- TX_8X4, // 8x4 transform
- TX_8X16, // 8x16 transform
- TX_16X8, // 16x8 transform
- TX_16X32, // 16x32 transform
- TX_32X16, // 32x16 transform
-#if CONFIG_TX64X64
+ TX_4X4, // 4x4 transform
+ TX_8X8, // 8x8 transform
+ TX_16X16, // 16x16 transform
+ TX_32X32, // 32x32 transform
+ TX_64X64, // 64x64 transform
+ TX_4X8, // 4x8 transform
+ TX_8X4, // 8x4 transform
+ TX_8X16, // 8x16 transform
+ TX_16X8, // 16x8 transform
+ TX_16X32, // 16x32 transform
+ TX_32X16, // 32x16 transform
TX_32X64, // 32x64 transform
TX_64X32, // 64x32 transform
-#endif // CONFIG_TX64X64
TX_4X16, // 4x16 transform
TX_16X4, // 16x4 transform
TX_8X32, // 8x32 transform
TX_32X8, // 32x8 transform
+ TX_16X64, // 16x64 transform
+ TX_64X16, // 64x16 transform
TX_SIZES_ALL, // Includes rectangular transforms
TX_SIZES = TX_4X8, // Does NOT include rectangular transforms
- TX_INVALID = 255 // Invalid transform size
+ TX_SIZES_LARGEST = TX_64X64,
+ TX_INVALID = 255 // Invalid transform size
+#if defined(_MSC_VER)
+};
+#else
} TX_SIZE;
+#endif
#define TX_SIZE_LUMA_MIN (TX_4X4)
/* We don't need to code a transform size unless the allowed size is at least
one more than the minimum. */
#define TX_SIZE_CTX_MIN (TX_SIZE_LUMA_MIN + 1)
-#define MAX_TX_DEPTH (TX_SIZES - TX_SIZE_CTX_MIN)
+// Maximum tx_size categories
+#define MAX_TX_CATS (TX_SIZES - TX_SIZE_CTX_MIN)
+#define MAX_TX_DEPTH 2
-#if CONFIG_CTX1D
-#define MAX_HVTX_SIZE (1 << 5)
-#endif // CONFIG_CTX1D
-
-#define MAX_TX_SIZE_LOG2 (5 + CONFIG_TX64X64)
+#define MAX_TX_SIZE_LOG2 (6)
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
+// Pad 4 extra columns to remove horizontal availability check.
+#define TX_PAD_HOR_LOG2 2
+#define TX_PAD_HOR 4
+// Pad 6 extra rows (2 on top and 4 on bottom) to remove vertical availability
+// check.
+#define TX_PAD_TOP 2
+#define TX_PAD_BOTTOM 4
+#define TX_PAD_VER (TX_PAD_TOP + TX_PAD_BOTTOM)
+// Pad 16 extra bytes to avoid reading overflow in SIMD optimization.
+#define TX_PAD_END 16
+#define TX_PAD_2D ((32 + TX_PAD_HOR) * (32 + TX_PAD_VER) + TX_PAD_END)
+
// Number of maxium size transform blocks in the maximum size superblock
#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-typedef enum ATTRIBUTE_PACKED {
- NCOBMC_MODE_0,
- NCOBMC_MODE_1,
- NCOBMC_MODE_2,
- NCOBMC_MODE_3,
- NCOBMC_MODE_4,
- NCOBMC_MODE_5,
- NCOBMC_MODE_6,
- NCOBMC_MODE_7,
- ALL_NCOBMC_MODES,
-#ifdef TWO_MODE
- MAX_NCOBMC_MODES = NCOBMC_MODE_1 + 1,
-#else
- MAX_NCOBMC_MODES = ALL_NCOBMC_MODES,
-#endif
- NO_OVERLAP = MAX_NCOBMC_MODES + 1
-} NCOBMC_MODE;
-
-typedef enum {
- ADAPT_OVERLAP_BLOCK_8X8,
- ADAPT_OVERLAP_BLOCK_16X16,
- ADAPT_OVERLAP_BLOCK_32X32,
- ADAPT_OVERLAP_BLOCK_64X64,
- ADAPT_OVERLAP_BLOCKS,
- ADAPT_OVERLAP_BLOCK_INVALID = 255
-} ADAPT_OVERLAP_BLOCK;
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-
// frame transform mode
-typedef enum {
- ONLY_4X4, // only 4x4 transform used
- ALLOW_8X8, // allow block transform size up to 8x8
- ALLOW_16X16, // allow block transform size up to 16x16
- ALLOW_32X32, // allow block transform size up to 32x32
-#if CONFIG_TX64X64
- ALLOW_64X64, // allow block transform size up to 64x64
-#endif
- TX_MODE_SELECT, // transform specified for each block
+typedef enum ATTRIBUTE_PACKED {
+ ONLY_4X4, // use only 4x4 transform
+ TX_MODE_LARGEST, // transform size is the largest possible for pu size
+ TX_MODE_SELECT, // transform specified for each block
TX_MODES,
} TX_MODE;
// 1D tx types
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
DCT_1D,
ADST_1D,
FLIPADST_1D,
IDTX_1D,
- // TODO(sarahparker) need to eventually put something here for the
- // mrc experiment to make this work with the ext-tx pruning functions
TX_TYPES_1D,
} TX_TYPE_1D;
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
DCT_DCT, // DCT in both horizontal and vertical
ADST_DCT, // ADST in vertical, DCT in horizontal
DCT_ADST, // DCT in vertical, ADST in horizontal
ADST_ADST, // ADST in both directions
-#if CONFIG_EXT_TX
FLIPADST_DCT,
DCT_FLIPADST,
FLIPADST_FLIPADST,
@@ -317,25 +271,26 @@ typedef enum {
H_ADST,
V_FLIPADST,
H_FLIPADST,
-#endif // CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- MRC_DCT, // DCT in both directions with mrc based bitmask
-#endif // CONFIG_MRC_TX
TX_TYPES,
} TX_TYPE;
-#if CONFIG_EXT_TX
typedef enum {
+ REG_REG,
+ REG_SMOOTH,
+ REG_SHARP,
+ SMOOTH_REG,
+ SMOOTH_SMOOTH,
+ SMOOTH_SHARP,
+ SHARP_REG,
+ SHARP_SMOOTH,
+ SHARP_SHARP,
+} DUAL_FILTER_TYPE;
+
+typedef enum ATTRIBUTE_PACKED {
// DCT only
EXT_TX_SET_DCTONLY,
// DCT + Identity only
EXT_TX_SET_DCT_IDTX,
-#if CONFIG_MRC_TX
- // DCT + MRC_DCT
- EXT_TX_SET_MRC_DCT,
- // DCT + MRC_DCT + IDTX
- EXT_TX_SET_MRC_DCT_IDTX,
-#endif // CONFIG_MRC_TX
// Discrete Trig transforms w/o flip (4) + Identity (1)
EXT_TX_SET_DTT4_IDTX,
// Discrete Trig transforms w/o flip (4) + Identity (1) + 1D Hor/vert DCT (2)
@@ -348,45 +303,13 @@ typedef enum {
} TxSetType;
#define IS_2D_TRANSFORM(tx_type) (tx_type < IDTX)
-#else
-#define IS_2D_TRANSFORM(tx_type) 1
-#endif
-typedef enum {
- TILE_LEFT_BOUNDARY = 1,
- TILE_RIGHT_BOUNDARY = 2,
- TILE_ABOVE_BOUNDARY = 4,
- TILE_BOTTOM_BOUNDARY = 8,
- FRAME_LEFT_BOUNDARY = 16,
- FRAME_RIGHT_BOUNDARY = 32,
- FRAME_ABOVE_BOUNDARY = 64,
- FRAME_BOTTOM_BOUNDARY = 128,
-} BOUNDARY_TYPE;
-
-#if CONFIG_EXT_TX
-#if CONFIG_CHROMA_2X2
-#define EXT_TX_SIZES 5 // number of sizes that use extended transforms
-#else
-#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
-#endif // CONFIG_CHROMA_2X2
-#if CONFIG_MRC_TX
-#define EXT_TX_SETS_INTER 5 // Sets of transform selections for INTER
-#define EXT_TX_SETS_INTRA 4 // Sets of transform selections for INTRA
-#else // CONFIG_MRC_TX
+#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
-#endif // CONFIG_MRC_TX
-#else
-#if CONFIG_CHROMA_2X2
-#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
-#else
-#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
-#endif
-#endif // CONFIG_EXT_TX
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
AOM_LAST_FLAG = 1 << 0,
-#if CONFIG_EXT_REFS
AOM_LAST2_FLAG = 1 << 1,
AOM_LAST3_FLAG = 1 << 2,
AOM_GOLD_FLAG = 1 << 3,
@@ -394,43 +317,45 @@ typedef enum {
AOM_ALT2_FLAG = 1 << 5,
AOM_ALT_FLAG = 1 << 6,
AOM_REFFRAME_ALL = (1 << 7) - 1
-#else // !CONFIG_EXT_REFS
- AOM_GOLD_FLAG = 1 << 1,
- AOM_ALT_FLAG = 1 << 2,
- AOM_REFFRAME_ALL = (1 << 3) - 1
-#endif // CONFIG_EXT_REFS
} AOM_REFFRAME;
-#if CONFIG_EXT_COMP_REFS
-#define USE_UNI_COMP_REFS 1
-
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
UNIDIR_COMP_REFERENCE,
BIDIR_COMP_REFERENCE,
COMP_REFERENCE_TYPES,
} COMP_REFERENCE_TYPE;
-#else // !CONFIG_EXT_COMP_REFS
-#define USE_UNI_COMP_REFS 0
-#endif // CONFIG_EXT_COMP_REFS
-typedef enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } PLANE_TYPE;
+typedef enum ATTRIBUTE_PACKED {
+ PLANE_TYPE_Y,
+ PLANE_TYPE_UV,
+ PLANE_TYPES
+} PLANE_TYPE;
-#if CONFIG_CFL
#define CFL_ALPHABET_SIZE_LOG2 4
#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1))
-typedef enum { CFL_PRED_U, CFL_PRED_V, CFL_PRED_PLANES } CFL_PRED_TYPE;
+typedef enum ATTRIBUTE_PACKED {
+ CFL_PRED_U,
+ CFL_PRED_V,
+ CFL_PRED_PLANES
+} CFL_PRED_TYPE;
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
CFL_SIGN_ZERO,
CFL_SIGN_NEG,
CFL_SIGN_POS,
CFL_SIGNS
} CFL_SIGN_TYPE;
+typedef enum ATTRIBUTE_PACKED {
+ CFL_DISALLOWED,
+ CFL_ALLOWED,
+ CFL_ALLOWED_TYPES
+} CFL_ALLOWED_TYPE;
+
// CFL_SIGN_ZERO,CFL_SIGN_ZERO is invalid
#define CFL_JOINT_SIGNS (CFL_SIGNS * CFL_SIGNS - 1)
// CFL_SIGN_U is equivalent to (js + 1) / 3 for js in 0 to 8
@@ -445,17 +370,13 @@ typedef enum {
// Also, the contexts are symmetric under swapping the planes.
#define CFL_CONTEXT_V(js) \
(CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS)
-#endif
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
PALETTE_MAP,
-#if CONFIG_MRC_TX
- MRC_MAP,
-#endif // CONFIG_MRC_TX
COLOR_MAP_TYPES,
} COLOR_MAP_TYPE;
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
TWO_COLORS,
THREE_COLORS,
FOUR_COLORS,
@@ -466,7 +387,7 @@ typedef enum {
PALETTE_SIZES
} PALETTE_SIZE;
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
PALETTE_COLOR_ONE,
PALETTE_COLOR_TWO,
PALETTE_COLOR_THREE,
@@ -478,36 +399,26 @@ typedef enum {
PALETTE_COLORS
} PALETTE_COLOR;
-// Note: All directional predictors must be between V_PRED and D63_PRED (both
+// Note: All directional predictors must be between V_PRED and D67_PRED (both
// inclusive).
typedef enum ATTRIBUTE_PACKED {
- DC_PRED, // Average of above and left pixels
- V_PRED, // Vertical
- H_PRED, // Horizontal
- D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
- D135_PRED, // Directional 135 deg = 180 - 45
- D117_PRED, // Directional 117 deg = 180 - 63
- D153_PRED, // Directional 153 deg = 180 - 27
- D207_PRED, // Directional 207 deg = 180 + 27
- D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
- SMOOTH_PRED, // Combination of horizontal and vertical interpolation
-#if CONFIG_SMOOTH_HV
+ DC_PRED, // Average of above and left pixels
+ V_PRED, // Vertical
+ H_PRED, // Horizontal
+ D45_PRED, // Directional 45 degree
+ D135_PRED, // Directional 135 degree
+ D113_PRED, // Directional 113 degree
+ D157_PRED, // Directional 157 degree
+ D203_PRED, // Directional 203 degree
+ D67_PRED, // Directional 67 degree
+ SMOOTH_PRED, // Combination of horizontal and vertical interpolation
SMOOTH_V_PRED, // Vertical interpolation
SMOOTH_H_PRED, // Horizontal interpolation
-#endif // CONFIG_SMOOTH_HV
- TM_PRED, // True-motion
+ PAETH_PRED, // Predict from the direction of smallest gradient
NEARESTMV,
NEARMV,
- ZEROMV,
+ GLOBALMV,
NEWMV,
-#if CONFIG_COMPOUND_SINGLEREF
- // Single ref compound modes
- SR_NEAREST_NEARMV,
- // SR_NEAREST_NEWMV,
- SR_NEAR_NEWMV,
- SR_ZERO_NEWMV,
- SR_NEW_NEWMV,
-#endif // CONFIG_COMPOUND_SINGLEREF
// Compound ref compound modes
NEAREST_NEARESTMV,
NEAR_NEARMV,
@@ -515,175 +426,131 @@ typedef enum ATTRIBUTE_PACKED {
NEW_NEARESTMV,
NEAR_NEWMV,
NEW_NEARMV,
- ZERO_ZEROMV,
+ GLOBAL_GLOBALMV,
NEW_NEWMV,
MB_MODE_COUNT,
- INTRA_MODES = TM_PRED + 1, // TM_PRED has to be the last intra mode.
+ INTRA_MODE_START = DC_PRED,
+ INTRA_MODE_END = NEARESTMV,
+ INTRA_MODE_NUM = INTRA_MODE_END - INTRA_MODE_START,
+ SINGLE_INTER_MODE_START = NEARESTMV,
+ SINGLE_INTER_MODE_END = NEAREST_NEARESTMV,
+ SINGLE_INTER_MODE_NUM = SINGLE_INTER_MODE_END - SINGLE_INTER_MODE_START,
+ COMP_INTER_MODE_START = NEAREST_NEARESTMV,
+ COMP_INTER_MODE_END = MB_MODE_COUNT,
+ COMP_INTER_MODE_NUM = COMP_INTER_MODE_END - COMP_INTER_MODE_START,
+ INTRA_MODES = PAETH_PRED + 1, // PAETH_PRED has to be the last intra mode.
INTRA_INVALID = MB_MODE_COUNT // For uv_mode in inter blocks
} PREDICTION_MODE;
-#if CONFIG_CFL
// TODO(ltrudeau) Do we really want to pack this?
// TODO(ltrudeau) Do we match with PREDICTION_MODE?
typedef enum ATTRIBUTE_PACKED {
- UV_DC_PRED, // Average of above and left pixels
- UV_V_PRED, // Vertical
- UV_H_PRED, // Horizontal
- UV_D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
- UV_D135_PRED, // Directional 135 deg = 180 - 45
- UV_D117_PRED, // Directional 117 deg = 180 - 63
- UV_D153_PRED, // Directional 153 deg = 180 - 27
- UV_D207_PRED, // Directional 207 deg = 180 + 27
- UV_D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
- UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation
-#if CONFIG_SMOOTH_HV
+ UV_DC_PRED, // Average of above and left pixels
+ UV_V_PRED, // Vertical
+ UV_H_PRED, // Horizontal
+ UV_D45_PRED, // Directional 45 degree
+ UV_D135_PRED, // Directional 135 degree
+ UV_D113_PRED, // Directional 113 degree
+ UV_D157_PRED, // Directional 157 degree
+ UV_D203_PRED, // Directional 203 degree
+ UV_D67_PRED, // Directional 67 degree
+ UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation
UV_SMOOTH_V_PRED, // Vertical interpolation
UV_SMOOTH_H_PRED, // Horizontal interpolation
-#endif // CONFIG_SMOOTH_HV
- UV_TM_PRED, // True-motion
+ UV_PAETH_PRED, // Predict from the direction of smallest gradient
UV_CFL_PRED, // Chroma-from-Luma
UV_INTRA_MODES,
UV_MODE_INVALID, // For uv_mode in inter blocks
} UV_PREDICTION_MODE;
-#else
-#define UV_INTRA_MODES (INTRA_MODES)
-#define UV_PREDICTION_MODE PREDICTION_MODE
-#define UV_DC_PRED (DC_PRED)
-#define UV_MODE_INVALID (INTRA_INVALID)
-#endif // CONFIG_CFL
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
SIMPLE_TRANSLATION,
-#if CONFIG_MOTION_VAR
- OBMC_CAUSAL, // 2-sided OBMC
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- NCOBMC_ADAPT_WEIGHT,
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
-#endif // CONFIG_MOTION_VAR
-#if CONFIG_WARPED_MOTION
+ OBMC_CAUSAL, // 2-sided OBMC
WARPED_CAUSAL, // 2-sided WARPED
-#endif // CONFIG_WARPED_MOTION
MOTION_MODES
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
- ,
- OBMC_FAMILY_MODES = NCOBMC_ADAPT_WEIGHT + 1
-#endif
} MOTION_MODE;
-#if CONFIG_INTERINTRA
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
II_DC_PRED,
II_V_PRED,
II_H_PRED,
II_SMOOTH_PRED,
INTERINTRA_MODES
} INTERINTRA_MODE;
-#endif
typedef enum {
COMPOUND_AVERAGE,
-#if CONFIG_WEDGE
COMPOUND_WEDGE,
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- COMPOUND_SEG,
-#endif // CONFIG_COMPOUND_SEGMENT
+ COMPOUND_DIFFWTD,
COMPOUND_TYPES,
} COMPOUND_TYPE;
-// TODO(huisu): Consider adding FILTER_SMOOTH_PRED to "FILTER_INTRA_MODE".
-#if CONFIG_FILTER_INTRA
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
FILTER_DC_PRED,
FILTER_V_PRED,
FILTER_H_PRED,
- FILTER_D45_PRED,
- FILTER_D135_PRED,
- FILTER_D117_PRED,
- FILTER_D153_PRED,
- FILTER_D207_PRED,
- FILTER_D63_PRED,
- FILTER_TM_PRED,
+ FILTER_D157_PRED,
+ FILTER_PAETH_PRED,
FILTER_INTRA_MODES,
} FILTER_INTRA_MODE;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
#define DIRECTIONAL_MODES 8
-#endif // CONFIG_EXT_INTRA
+#define MAX_ANGLE_DELTA 3
+#define ANGLE_STEP 3
#define INTER_MODES (1 + NEWMV - NEARESTMV)
-#if CONFIG_COMPOUND_SINGLEREF
-#define INTER_SINGLEREF_COMP_MODES (1 + SR_NEW_NEWMV - SR_NEAREST_NEARMV)
-#endif // CONFIG_COMPOUND_SINGLEREF
-
#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
#define SKIP_CONTEXTS 3
+#define SKIP_MODE_CONTEXTS 3
+
+#define COMP_INDEX_CONTEXTS 6
+#define COMP_GROUP_IDX_CONTEXTS 6
#define NMV_CONTEXTS 3
-#define NEWMV_MODE_CONTEXTS 7
-#define ZEROMV_MODE_CONTEXTS 2
-#define REFMV_MODE_CONTEXTS 9
-#define DRL_MODE_CONTEXTS 5
+#define NEWMV_MODE_CONTEXTS 6
+#define GLOBALMV_MODE_CONTEXTS 2
+#define REFMV_MODE_CONTEXTS 6
+#define DRL_MODE_CONTEXTS 3
-#define ZEROMV_OFFSET 3
+#define GLOBALMV_OFFSET 3
#define REFMV_OFFSET 4
-#define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1)
-#define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1)
+#define NEWMV_CTX_MASK ((1 << GLOBALMV_OFFSET) - 1)
+#define GLOBALMV_CTX_MASK ((1 << (REFMV_OFFSET - GLOBALMV_OFFSET)) - 1)
#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
-#define ALL_ZERO_FLAG_OFFSET 8
-#define SKIP_NEARESTMV_OFFSET 9
-#define SKIP_NEARMV_OFFSET 10
-#define SKIP_NEARESTMV_SUB8X8_OFFSET 11
+#define COMP_NEWMV_CTXS 5
+#define INTER_MODE_CONTEXTS 8
-#define INTER_MODE_CONTEXTS 7
#define DELTA_Q_SMALL 3
#define DELTA_Q_PROBS (DELTA_Q_SMALL)
#define DEFAULT_DELTA_Q_RES 4
-#if CONFIG_EXT_DELTA_Q
#define DELTA_LF_SMALL 3
#define DELTA_LF_PROBS (DELTA_LF_SMALL)
#define DEFAULT_DELTA_LF_RES 2
-#endif
/* Segment Feature Masks */
#define MAX_MV_REF_CANDIDATES 2
-#define MAX_REF_MV_STACK_SIZE 16
-#if CONFIG_EXT_PARTITION
+#define MAX_REF_MV_STACK_SIZE 8
#define REF_CAT_LEVEL 640
-#else
-#define REF_CAT_LEVEL 255
-#endif // CONFIG_EXT_PARTITION
#define INTRA_INTER_CONTEXTS 4
#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 5
+#define REF_CONTEXTS 3
-#if CONFIG_EXT_COMP_REFS
#define COMP_REF_TYPE_CONTEXTS 5
#define UNI_COMP_REF_CONTEXTS 3
-#endif // CONFIG_EXT_COMP_REFS
-
-#if CONFIG_COMPOUND_SINGLEREF
-#define COMP_INTER_MODE_CONTEXTS 4
-#endif // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_VAR_TX
-#define TXFM_PARTITION_CONTEXTS ((TX_SIZES - TX_8X8) * 6 - 2)
+#define TXFM_PARTITION_CONTEXTS ((TX_SIZES - TX_8X8) * 6 - 3)
typedef uint8_t TXFM_CONTEXT;
-#endif
#define NONE_FRAME -1
#define INTRA_FRAME 0
#define LAST_FRAME 1
-
-#if CONFIG_EXT_REFS
#define LAST2_FRAME 2
#define LAST3_FRAME 3
#define GOLDEN_FRAME 4
@@ -691,94 +558,55 @@ typedef uint8_t TXFM_CONTEXT;
#define ALTREF2_FRAME 6
#define ALTREF_FRAME 7
#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
-#else // !CONFIG_EXT_REFS
-#define GOLDEN_FRAME 2
-#define ALTREF_FRAME 3
-#endif // CONFIG_EXT_REFS
#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
-#define TOTAL_REFS_PER_FRAME (ALTREF_FRAME - INTRA_FRAME + 1)
#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
-#if CONFIG_EXT_REFS
#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1)
#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
-#else
-#define BWD_REFS 1
-#define BWD_RF_OFFSET(ref) (ref - ALTREF_FRAME)
-#endif // CONFIG_EXT_REFS
#define SINGLE_REFS (FWD_REFS + BWD_REFS)
-#if CONFIG_EXT_COMP_REFS
-typedef enum {
- LAST_LAST2_FRAMES, // { LAST_FRAME, LAST2_FRAME }
- LAST_LAST3_FRAMES, // { LAST_FRAME, LAST3_FRAME }
- LAST_GOLDEN_FRAMES, // { LAST_FRAME, GOLDEN_FRAME }
- BWDREF_ALTREF_FRAMES, // { BWDREF_FRAME, ALTREF_FRAME }
- UNIDIR_COMP_REFS
+
+typedef enum ATTRIBUTE_PACKED {
+ LAST_LAST2_FRAMES, // { LAST_FRAME, LAST2_FRAME }
+ LAST_LAST3_FRAMES, // { LAST_FRAME, LAST3_FRAME }
+ LAST_GOLDEN_FRAMES, // { LAST_FRAME, GOLDEN_FRAME }
+ BWDREF_ALTREF_FRAMES, // { BWDREF_FRAME, ALTREF_FRAME }
+ LAST2_LAST3_FRAMES, // { LAST2_FRAME, LAST3_FRAME }
+ LAST2_GOLDEN_FRAMES, // { LAST2_FRAME, GOLDEN_FRAME }
+ LAST3_GOLDEN_FRAMES, // { LAST3_FRAME, GOLDEN_FRAME }
+ BWDREF_ALTREF2_FRAMES, // { BWDREF_FRAME, ALTREF2_FRAME }
+ ALTREF2_ALTREF_FRAMES, // { ALTREF2_FRAME, ALTREF_FRAME }
+ TOTAL_UNIDIR_COMP_REFS,
+ // NOTE: UNIDIR_COMP_REFS is the number of uni-directional reference pairs
+ // that are explicitly signaled.
+ UNIDIR_COMP_REFS = BWDREF_ALTREF_FRAMES + 1,
} UNIDIR_COMP_REF;
-#define COMP_REFS (FWD_REFS * BWD_REFS + UNIDIR_COMP_REFS)
-#else // !CONFIG_EXT_COMP_REFS
-#define COMP_REFS (FWD_REFS * BWD_REFS)
-#endif // CONFIG_EXT_COMP_REFS
-#define MODE_CTX_REF_FRAMES (TOTAL_REFS_PER_FRAME + COMP_REFS)
+#define TOTAL_COMP_REFS (FWD_REFS * BWD_REFS + TOTAL_UNIDIR_COMP_REFS)
-#if CONFIG_SUPERTX
-#define PARTITION_SUPERTX_CONTEXTS 2
-#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32
-#endif // CONFIG_SUPERTX
+#define COMP_REFS (FWD_REFS * BWD_REFS + UNIDIR_COMP_REFS)
-#if CONFIG_LOOP_RESTORATION
-typedef enum {
+// NOTE: A limited number of unidirectional reference pairs can be signalled for
+// compound prediction. The use of skip mode, on the other hand, makes it
+// possible to have a reference pair not listed for explicit signaling.
+#define MODE_CTX_REF_FRAMES (REF_FRAMES + TOTAL_COMP_REFS)
+
+typedef enum ATTRIBUTE_PACKED {
RESTORE_NONE,
RESTORE_WIENER,
RESTORE_SGRPROJ,
RESTORE_SWITCHABLE,
RESTORE_SWITCHABLE_TYPES = RESTORE_SWITCHABLE,
- RESTORE_TYPES,
+ RESTORE_TYPES = 4,
} RestorationType;
-#endif // CONFIG_LOOP_RESTORATION
-#if CONFIG_FRAME_SUPERRES
#define SUPERRES_SCALE_BITS 3
-#define SUPERRES_SCALE_DENOMINATOR_MIN 8
-#endif // CONFIG_FRAME_SUPERRES
-
-#if CONFIG_LPF_DIRECT
-typedef enum {
- VERT_HORZ,
- DEGREE_30,
- DEGREE_45,
- DEGREE_60,
- DEGREE_120,
- DEGREE_135,
- DEGREE_150,
- FILTER_DEGREES,
-} FILTER_DEGREE;
-#endif // CONFIG_LPF_DIRECT
-
-#if CONFIG_OBU
-// R19
-typedef enum {
- OBU_SEQUENCE_HEADER = 1,
- OBU_TD = 2,
- OBU_FRAME_HEADER = 3,
- OBU_TILE_GROUP = 4,
- OBU_METADATA = 5,
- OBU_PADDING = 15,
-} OBU_TYPE;
-#endif
+#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
-#if CONFIG_LGT_FROM_PRED
-#define LGT_SIZES 2
-// Note: at least one of LGT_FROM_PRED_INTRA and LGT_FROM_PRED_INTER must be 1
-#define LGT_FROM_PRED_INTRA 1
-#define LGT_FROM_PRED_INTER 1
-// LGT_SL_INTRA: LGTs with a mode-dependent first self-loop and a break point
-#define LGT_SL_INTRA 0
-#endif // CONFIG_LGT_FROM_PRED
+// In large_scale_tile coding, external references are used.
+#define MAX_EXTERNAL_REFERENCES 128
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/filter.c b/third_party/aom/av1/common/filter.c
index 135132316..a7e67ea4a 100644
--- a/third_party/aom/av1/common/filter.c
+++ b/third_party/aom/av1/common/filter.c
@@ -25,153 +25,6 @@ DECLARE_ALIGNED(256, static const InterpKernel,
{ 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
};
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, static const int16_t,
- sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
- // intfilt 0.8
- { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
- { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0 },
- { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0 },
- { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1 },
- { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1 },
- { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1 },
- { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1 },
- { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1 },
- { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1 },
- { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1 },
- { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1 },
- { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1 },
- { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1 },
- { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1 },
- { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0 },
- { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0 },
-};
-#endif // USE_TEMPORALFILTER_12TAP
-
-#if USE_EXTRA_FILTER
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
- { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
- { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
- { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
- { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
- { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
- { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
- { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_regular_uv[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
- { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
- { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
- { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
- { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
- { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
- { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
- { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
-};
-
-#if USE_12TAP_FILTER
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
- // intfilt 0.8
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 2, -6, 127, 9, -4, 2, -1 },
- { -2, 5, -12, 124, 18, -7, 4, -2 }, { -2, 7, -16, 119, 28, -11, 5, -2 },
- { -3, 8, -19, 114, 38, -14, 7, -3 }, { -3, 9, -22, 107, 49, -17, 8, -3 },
- { -4, 10, -23, 99, 60, -20, 10, -4 }, { -4, 11, -23, 90, 70, -22, 10, -4 },
- { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -22, 70, 90, -23, 11, -4 },
- { -4, 10, -20, 60, 99, -23, 10, -4 }, { -3, 8, -17, 49, 107, -22, 9, -3 },
- { -3, 7, -14, 38, 114, -19, 8, -3 }, { -2, 5, -11, 28, 119, -16, 7, -2 },
- { -2, 4, -7, 18, 124, -12, 5, -2 }, { -1, 2, -4, 9, 127, -6, 2, -1 },
-};
-
-DECLARE_ALIGNED(256, static const int16_t,
- sub_pel_filters_10sharp[SUBPEL_SHIFTS][12]) = {
- // intfilt 0.85
- { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
- { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 },
- { 0, 1, -3, 6, -13, 124, 18, -8, 4, -2, 1, 0 },
- { 0, 2, -4, 8, -18, 120, 28, -12, 6, -4, 2, 0 },
- { 0, 2, -5, 10, -21, 114, 38, -15, 8, -5, 2, 0 },
- { 0, 3, -6, 11, -24, 107, 49, -19, 10, -6, 3, 0 },
- { 0, 3, -7, 12, -25, 99, 59, -21, 11, -6, 3, 0 },
- { 0, 3, -7, 12, -25, 90, 70, -23, 12, -7, 3, 0 },
- { 0, 3, -7, 12, -25, 81, 81, -25, 12, -7, 3, 0 },
- { 0, 3, -7, 12, -23, 70, 90, -25, 12, -7, 3, 0 },
- { 0, 3, -6, 11, -21, 59, 99, -25, 12, -7, 3, 0 },
- { 0, 3, -6, 10, -19, 49, 107, -24, 11, -6, 3, 0 },
- { 0, 2, -5, 8, -15, 38, 114, -21, 10, -5, 2, 0 },
- { 0, 2, -4, 6, -12, 28, 120, -18, 8, -4, 2, 0 },
- { 0, 1, -2, 4, -8, 18, 124, -13, 6, -3, 1, 0 },
- { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0 },
-};
-#else
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
- { -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
- { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
- { -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
- { -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
- { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
- { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
- { -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
-};
-#endif
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
- // freqmultiplier = 0.2
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 9, 30, 44, 32, 11, 2, 0 },
- { 0, 8, 28, 44, 34, 12, 2, 0 }, { 0, 7, 27, 44, 35, 13, 2, 0 },
- { 0, 6, 26, 43, 37, 14, 2, 0 }, { 0, 5, 24, 43, 38, 16, 2, 0 },
- { 0, 5, 23, 42, 38, 17, 3, 0 }, { 0, 4, 21, 41, 40, 19, 3, 0 },
- { 0, 4, 20, 40, 40, 20, 4, 0 }, { 0, 3, 19, 40, 41, 21, 4, 0 },
- { 0, 3, 17, 38, 42, 23, 5, 0 }, { 0, 2, 16, 38, 43, 24, 5, 0 },
- { 0, 2, 14, 37, 43, 26, 6, 0 }, { 0, 2, 13, 35, 44, 27, 7, 0 },
- { 0, 2, 12, 34, 44, 28, 8, 0 }, { 0, 2, 11, 32, 44, 30, 9, 0 },
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_smooth2_uv[SUBPEL_SHIFTS]) = {
- // freqmultiplier = 0.2
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 9, 30, 44, 32, 11, 2, 0 },
- { 0, 8, 28, 44, 34, 12, 2, 0 }, { 0, 7, 27, 44, 35, 13, 2, 0 },
- { 0, 6, 26, 43, 37, 14, 2, 0 }, { 0, 5, 24, 43, 38, 16, 2, 0 },
- { 0, 5, 23, 42, 38, 17, 3, 0 }, { 0, 4, 21, 41, 40, 19, 3, 0 },
- { 0, 4, 20, 40, 40, 20, 4, 0 }, { 0, 3, 19, 40, 41, 21, 4, 0 },
- { 0, 3, 17, 38, 42, 23, 5, 0 }, { 0, 2, 16, 38, 43, 24, 5, 0 },
- { 0, 2, 14, 37, 43, 26, 6, 0 }, { 0, 2, 13, 35, 44, 27, 7, 0 },
- { 0, 2, 12, 34, 44, 28, 8, 0 }, { 0, 2, 11, 32, 44, 30, 9, 0 },
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
- { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- sub_pel_filters_smooth_uv[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
- { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
-};
-#else // USE_EXTRA_FILTER
-
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
@@ -207,49 +60,7 @@ DECLARE_ALIGNED(256, static const InterpKernel,
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
};
-#endif // USE_EXTRA_FILTER
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-const InterpKernel *av1_intra_filter_kernels[INTRA_FILTERS] = {
- bilinear_filters, // INTRA_FILTER_LINEAR
- sub_pel_filters_8, // INTRA_FILTER_8TAP
- sub_pel_filters_8sharp, // INTRA_FILTER_8TAP_SHARP
- sub_pel_filters_8smooth, // INTRA_FILTER_8TAP_SMOOTH
-};
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-
-#if USE_EXTRA_FILTER
-static const InterpFilterParams
- av1_interp_filter_params_list[SWITCHABLE_FILTERS + EXTRA_FILTERS] = {
- { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_REGULAR },
- { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_SMOOTH },
-#if USE_12TAP_FILTER
- { (const int16_t *)sub_pel_filters_10sharp, 12, SUBPEL_SHIFTS,
- MULTITAP_SHARP },
-#else
- { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_SHARP },
-#endif
- { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_SMOOTH2 },
- { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
- BILINEAR },
- { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_SHARP },
- { (const int16_t *)sub_pel_filters_regular_uv, SUBPEL_TAPS, SUBPEL_SHIFTS,
- FILTER_REGULAR_UV },
- { (const int16_t *)sub_pel_filters_smooth_uv, SUBPEL_TAPS, SUBPEL_SHIFTS,
- FILTER_SMOOTH_UV },
- { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
- FILTER_SHARP_UV },
- { (const int16_t *)sub_pel_filters_smooth2_uv, SUBPEL_TAPS, SUBPEL_SHIFTS,
- FILTER_SMOOTH2_UV },
- };
-#else
static const InterpFilterParams
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
{ (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
@@ -261,62 +72,49 @@ static const InterpFilterParams
{ (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
BILINEAR }
};
-#endif // USE_EXTRA_FILTER
-#if USE_TEMPORALFILTER_12TAP
-static const InterpFilterParams av1_interp_temporalfilter_12tap = {
- (const int16_t *)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS,
- TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
+ { 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
+ { 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
+ { 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
+ { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
+ { 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
+ { 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
+ { 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
};
-#endif // USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
+ { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
+ { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
+ { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
+ { 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
+ { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
+ { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
+ { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
+};
+
+static const InterpFilterParams av1_interp_4tap[2] = {
+ { (const int16_t *)sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_REGULAR },
+ { (const int16_t *)sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
+ EIGHTTAP_SMOOTH },
+};
+
+InterpFilterParams av1_get_interp_filter_params_with_block_size(
+ const InterpFilter interp_filter, const int w) {
+ if (w <= 4 &&
+ (interp_filter == MULTITAP_SHARP || interp_filter == EIGHTTAP_REGULAR))
+ return av1_interp_4tap[0];
+ else if (w <= 4 && interp_filter == EIGHTTAP_SMOOTH)
+ return av1_interp_4tap[1];
-InterpFilterParams av1_get_interp_filter_params(
- const InterpFilter interp_filter) {
-#if USE_TEMPORALFILTER_12TAP
- if (interp_filter == TEMPORALFILTER_12TAP)
- return av1_interp_temporalfilter_12tap;
-#endif // USE_TEMPORALFILTER_12TAP
return av1_interp_filter_params_list[interp_filter];
}
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter) {
-#if USE_TEMPORALFILTER_12TAP
- if (interp_filter == TEMPORALFILTER_12TAP)
- return av1_interp_temporalfilter_12tap.filter_ptr;
-#endif // USE_TEMPORALFILTER_12TAP
return (const int16_t *)av1_interp_filter_params_list[interp_filter]
.filter_ptr;
}
-
-#if CONFIG_DUAL_FILTER
-InterpFilter av1_get_plane_interp_filter(InterpFilter interp_filter,
- int plane) {
-#if USE_TEMPORALFILTER_12TAP
-#if USE_EXTRA_FILTER
- assert(interp_filter <= EIGHTTAP_SHARP ||
- interp_filter == TEMPORALFILTER_12TAP);
-#else // USE_EXTRA_FILTER
- assert(interp_filter <= SWITCHABLE_FILTERS ||
- interp_filter == TEMPORALFILTER_12TAP);
-#endif // USE_EXTRA_FILTER
-#else
- assert(interp_filter <= EIGHTTAP_SHARP);
-#endif
-#if USE_EXTRA_FILTER
- if (plane == 0) {
- return interp_filter;
- } else {
- switch (interp_filter) {
- case EIGHTTAP_REGULAR: return FILTER_REGULAR_UV;
- case EIGHTTAP_SMOOTH: return FILTER_SMOOTH_UV;
- case MULTITAP_SHARP: return FILTER_SHARP_UV;
- case EIGHTTAP_SMOOTH2: return FILTER_SMOOTH2_UV;
- default: return interp_filter;
- }
- }
-#else // USE_EXTRA_FILTER
- (void)plane;
- return interp_filter;
-#endif // USE_EXTRA_FILTER
-}
-#endif
diff --git a/third_party/aom/av1/common/filter.h b/third_party/aom/av1/common/filter.h
index 343e87560..0c24ad9d0 100644
--- a/third_party/aom/av1/common/filter.h
+++ b/third_party/aom/av1/common/filter.h
@@ -14,7 +14,8 @@
#include <assert.h>
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom/aom_integer.h"
#include "aom_dsp/aom_filter.h"
#include "aom_ports/mem.h"
@@ -23,34 +24,17 @@
extern "C" {
#endif
-#define USE_TEMPORALFILTER_12TAP 1
-#define MAX_FILTER_TAP 12
-
-#define USE_12TAP_FILTER 0
-#define USE_EXTRA_FILTER 0
+#define MAX_FILTER_TAP 8
-typedef enum {
+typedef enum ATTRIBUTE_PACKED {
EIGHTTAP_REGULAR,
EIGHTTAP_SMOOTH,
-#if USE_EXTRA_FILTER
- EIGHTTAP_SMOOTH2,
-#endif // USE_EXTRA_FILTER
MULTITAP_SHARP,
BILINEAR,
-#if USE_EXTRA_FILTER
- EIGHTTAP_SHARP,
- FILTER_REGULAR_UV,
- FILTER_SMOOTH_UV,
- FILTER_SHARP_UV,
- FILTER_SMOOTH2_UV,
-#endif // USE_EXTRA_FILTER
INTERP_FILTERS_ALL,
SWITCHABLE_FILTERS = BILINEAR,
SWITCHABLE = SWITCHABLE_FILTERS + 1, /* the last switchable one */
EXTRA_FILTERS = INTERP_FILTERS_ALL - SWITCHABLE_FILTERS,
-#if USE_TEMPORALFILTER_12TAP
- TEMPORALFILTER_12TAP = SWITCHABLE_FILTERS + EXTRA_FILTERS,
-#endif
} InterpFilter;
// With CONFIG_DUAL_FILTER, pack two InterpFilter's into a uint32_t: since
@@ -59,73 +43,34 @@ typedef enum {
// setting a (pair of) filters.
//
// Without CONFIG_DUAL_FILTER,
-#if CONFIG_DUAL_FILTER
typedef uint32_t InterpFilters;
static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters,
int x_filter) {
- return (InterpFilter)((filters >> (x_filter ? 16 : 0)) & 0xffff);
+ return (InterpFilter)((filters >> (x_filter ? 16 : 0)) & 0xf);
}
static INLINE InterpFilters av1_make_interp_filters(InterpFilter y_filter,
InterpFilter x_filter) {
- uint16_t y16 = y_filter & 0xffff;
- uint16_t x16 = x_filter & 0xffff;
+ uint16_t y16 = y_filter & 0xf;
+ uint16_t x16 = x_filter & 0xf;
return y16 | ((uint32_t)x16 << 16);
}
static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) {
return av1_make_interp_filters(filter, filter);
}
-#else
-typedef InterpFilter InterpFilters;
-static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters,
- int x_filter) {
-#ifdef NDEBUG
- (void)x_filter;
-#endif
- assert(!x_filter);
- return filters;
-}
-
-static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) {
- return filter;
-}
-#endif
static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) {
return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter;
}
-#if USE_EXTRA_FILTER
-#define LOG_SWITCHABLE_FILTERS \
- 3 /* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
-#else
#define LOG_SWITCHABLE_FILTERS \
2 /* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
-#endif
-#if CONFIG_DUAL_FILTER
#define MAX_SUBPEL_TAPS 12
#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1)
#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2)
-#else // CONFIG_DUAL_FILTER
-#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
-#endif // CONFIG_DUAL_FILTER
-
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-typedef enum {
- INTRA_FILTER_LINEAR,
- INTRA_FILTER_8TAP,
- INTRA_FILTER_8TAP_SHARP,
- INTRA_FILTER_8TAP_SMOOTH,
- INTRA_FILTERS,
-} INTRA_FILTER;
-
-extern const InterpKernel *av1_intra_filter_kernels[INTRA_FILTERS];
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
typedef struct InterpFilterParams {
const int16_t *filter_ptr;
@@ -134,26 +79,16 @@ typedef struct InterpFilterParams {
InterpFilter interp_filter;
} InterpFilterParams;
-InterpFilterParams av1_get_interp_filter_params(
- const InterpFilter interp_filter);
-
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter);
+InterpFilterParams av1_get_interp_filter_params_with_block_size(
+ const InterpFilter interp_filter, const int w);
+
static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
const InterpFilterParams filter_params, const int subpel) {
return filter_params.filter_ptr + filter_params.taps * subpel;
}
-static INLINE int av1_is_interpolating_filter(
- const InterpFilter interp_filter) {
- const InterpFilterParams ip = av1_get_interp_filter_params(interp_filter);
- return (ip.filter_ptr[ip.taps / 2 - 1] == 128);
-}
-
-#if CONFIG_DUAL_FILTER
-InterpFilter av1_get_plane_interp_filter(InterpFilter interp_filter, int plane);
-#endif
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/frame_buffers.c b/third_party/aom/av1/common/frame_buffers.c
index 0b6b78e3d..502ccd27d 100644
--- a/third_party/aom/av1/common/frame_buffers.c
+++ b/third_party/aom/av1/common/frame_buffers.c
@@ -75,5 +75,6 @@ int av1_release_frame_buffer(void *cb_priv, aom_codec_frame_buffer_t *fb) {
InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
(void)cb_priv;
if (int_fb) int_fb->in_use = 0;
+ fb->priv = NULL;
return 0;
}
diff --git a/third_party/aom/av1/common/generic_code.c b/third_party/aom/av1/common/generic_code.c
deleted file mode 100644
index 7285decc9..000000000
--- a/third_party/aom/av1/common/generic_code.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "generic_code.h"
-
-void aom_cdf_init_q15_1D(uint16_t *cdf, int nsyms, int cdf_size) {
- int i;
- for (i = 0; i < nsyms; i++)
- cdf[i] = AOM_ICDF((i + 1)*CDF_PROB_TOP/nsyms);
-
- cdf[cdf_size - 1] = 0;
-}
-
-/** Adapts a Q15 cdf after encoding/decoding a symbol. */
-void aom_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
- int i;
- *count = OD_MINI(*count + 1, 1 << rate);
- OD_ASSERT(AOM_ICDF(cdf[n - 1]) == 32768);
- if (*count >= 1 << rate) {
- /* Steady-state adaptation based on a simple IIR with dyadic rate. */
- for (i = 0; i < n; i++) {
- int tmp;
- /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be
- positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1),
- to ensure we don't drive any probabilities to 0. Replacing cdf[i] with
- (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces
- tmp == i + 2 - (1 << rate). Using this value of tmp with
- cdf[i] == i + 1 instead gives an adjustment of 0 as desired.
-
- When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so
- long as cdf[i] < 32768 - (n - 1 - i), and 0 when
- cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any
- probabilities to 0. Since right-shifting any negative value is still
- negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp,
- producing tmp = 32769 - n + i. Using this value of tmp with smaller
- values of cdf[i] instead gives negative adjustments, as desired.
-
- Combining the two cases gives the expression below. These could be
- stored in a lookup table indexed by n and rate to avoid the
- arithmetic. */
- tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
- cdf[i] = AOM_ICDF(AOM_ICDF(cdf[i]) - ((AOM_ICDF(cdf[i]) - tmp) >> rate));
- }
- }
- else {
- int alpha;
- /* Initial adaptation for the first symbols. The adaptation rate is
- computed to be equivalent to what od_{en,de}code_cdf_adapt() does
- when the initial cdf is set to increment/4. */
- alpha = 4*32768/(n + 4**count);
- for (i = 0; i < n; i++) {
- int tmp;
- tmp = (32768 - n)*(i >= val) + i + 1;
- cdf[i] = AOM_ICDF(AOM_ICDF(cdf[i])
- - (((AOM_ICDF(cdf[i]) - tmp)*alpha) >> 15));
- }
- }
- OD_ASSERT(AOM_ICDF(cdf[n - 1]) == 32768);
-}
-
-/** Takes the base-2 log of E(x) in Q1.
- *
- * @param [in] ExQ16 expectation of x in Q16
- *
- * @retval 2*log2(ExQ16/2^16)
- */
-int log_ex(int ex_q16) {
- int lg;
- int lg_q1;
- int odd;
- lg = OD_ILOG(ex_q16);
- if (lg < 15) {
- odd = ex_q16*ex_q16 > 2 << 2*lg;
- }
- else {
- int tmp;
- tmp = ex_q16 >> (lg - 8);
- odd = tmp*tmp > (1 << 15);
- }
- lg_q1 = OD_MAXI(0, 2*lg - 33 + odd);
- return lg_q1;
-}
-
-/** Updates the probability model based on the encoded/decoded value
- *
- * @param [in,out] model generic prob model
- * @param [in,out] ExQ16 expectation of x
- * @param [in] x variable encoded/decoded (used for ExQ16)
- * @param [in] xs variable x after shift (used for the model)
- * @param [in] id id of the icdf to adapt
- * @param [in] integration integration period of ExQ16 (leaky average over
- * 1<<integration samples)
- */
-void generic_model_update(int *ex_q16, int x, int integration) {
- /* We could have saturated ExQ16 directly, but this is safe and simpler */
- x = OD_MINI(x, 32767);
- OD_IIR_DIADIC(*ex_q16, x << 16, integration);
-}
diff --git a/third_party/aom/av1/common/generic_code.h b/third_party/aom/av1/common/generic_code.h
deleted file mode 100644
index e1620ee8e..000000000
--- a/third_party/aom/av1/common/generic_code.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#if !defined(_generic_code_H)
-# define _generic_code_H
-
-# include "aom_dsp/bitreader.h"
-# include "aom_dsp/bitwriter.h"
-
-# define GENERIC_TABLES 12
-
-#define generic_decode(r, model, ex_q16, integration, ACCT_STR_NAME) \
- generic_decode_(r, model, ex_q16, integration ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_decode_cdf_adapt_q15(r, cdf, n, count, rate, ACCT_STR_NAME) \
- aom_decode_cdf_adapt_q15_(r, cdf, n, count, rate ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_decode_cdf_adapt(r, cdf, n, increment, ACCT_STR_NAME) \
- aom_decode_cdf_adapt_(r, cdf, n, increment ACCT_STR_ARG(ACCT_STR_NAME))
-
-typedef struct {
- /** cdf for multiple expectations of x */
- uint16_t cdf[GENERIC_TABLES][CDF_SIZE(16)];
-} generic_encoder;
-
-#define OD_IIR_DIADIC(y, x, shift) ((y) += ((x) - (y)) >> (shift))
-
-void generic_model_init(generic_encoder *model);
-
-/* Initialize a CDF for use by aom_write_symbol_pvq()/aom_read_symbol_pvq().
- This is used for CDFs whose size might not match the declared array size.
- The only real requirement is that the first value of every CDF be zero.
- Then aom_cdf_init_q15_1D() will be called with the real size the first time
- the CDF is used. */
-#define OD_CDFS_INIT_DYNAMIC(cdf) (memset(cdf, 0, sizeof(cdf)))
-
-// WARNING: DO NOT USE this init function,
-// if the size of cdf is different from what is declared by code.
-#define OD_CDFS_INIT_Q15(cdfs) \
- { int n_cdfs = sizeof(cdfs)/sizeof(cdfs[0]); \
- int cdf_size = sizeof(cdfs[0])/sizeof(cdfs[0][0]); \
- int nsyms = cdf_size - 1; \
- int i_; \
- for (i_ = 0; i_ < n_cdfs; i_++) \
- aom_cdf_init_q15_1D(cdfs[i_], nsyms, cdf_size); \
- }
-
-void aom_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first);
-
-void aom_cdf_init_q15_1D(uint16_t *cdf, int nsyms, int cdf_size);
-
-void aom_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate);
-
-void aom_encode_cdf_adapt_q15(aom_writer *w, int val, uint16_t *cdf, int n,
- int *count, int rate);
-
-void generic_encode(aom_writer *w, generic_encoder *model, int x,
- int *ex_q16, int integration);
-double generic_encode_cost(generic_encoder *model, int x, int *ex_q16);
-
-double od_encode_cdf_cost(int val, uint16_t *cdf, int n);
-
-int aom_decode_cdf_adapt_q15_(aom_reader *r, uint16_t *cdf, int n,
- int *count, int rate ACCT_STR_PARAM);
-
-int generic_decode_(aom_reader *r, generic_encoder *model,
- int *ex_q16, int integration ACCT_STR_PARAM);
-
-int log_ex(int ex_q16);
-
-void generic_model_update(int *ex_q16, int x, int integration);
-
-#endif
diff --git a/third_party/aom/av1/common/idct.c b/third_party/aom/av1/common/idct.c
index 53c2ba1f0..bc758eb57 100644
--- a/third_party/aom/av1/common/idct.c
+++ b/third_party/aom/av1/common/idct.c
@@ -11,2624 +11,34 @@
#include <math.h>
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-#include "aom_dsp/inv_txfm.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "aom_ports/mem.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "av1/common/av1_txfm.h"
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
- CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
-#include "av1/common/daala_tx.h"
-#endif
int av1_get_tx_scale(const TX_SIZE tx_size) {
const int pels = tx_size_2d[tx_size];
- return (pels > 256) + (pels > 1024) + (pels > 4096);
+ // Largest possible pels is 4096 (64x64).
+ return (pels > 256) + (pels > 1024);
}
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.
-#if CONFIG_EXT_TX
-static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 4; ++i) {
- output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
- }
-}
-
-static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 8; ++i) {
- output[i] = input[i] * 2;
- }
-}
-
-static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 16; ++i) {
- output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
- }
-}
-
-static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 32; ++i) {
- output[i] = input[i] * 4;
- }
-}
-
-#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
-static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 64; ++i) {
- output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
- }
-}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
-
-// For use in lieu of ADST
-static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- // Multiply input by sqrt(2)
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
- }
- for (i = 0; i < 16; ++i) {
- output[i] = input[16 + i] * 4;
- }
- aom_idct16_c(inputhalf, output + 16);
- // Note overall scaling factor is 4 times orthogonal
-}
-
-#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
-static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
- int32_t in[64], out[64];
- int i;
- for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
- for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
-static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
- int32_t in[64], out[64];
- int i;
- for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
- for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
-// For use in lieu of ADST
-static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[32];
- // Multiply input by sqrt(2)
- for (i = 0; i < 32; ++i) {
- inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
- }
- for (i = 0; i < 32; ++i) {
- output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
- }
- aom_idct32_c(inputhalf, output + 32);
- // Note overall scaling factor is 4 * sqrt(2) times orthogonal
-}
-#endif // CONFIG_TX64X64
-
-// Inverse identity transform and add.
-#if CONFIG_EXT_TX
-static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int bsx, int bsy, TX_TYPE tx_type) {
- int r, c;
- const int pels = bsx * bsy;
- const int shift = 3 - ((pels > 256) + (pels > 1024));
- if (tx_type == IDTX) {
- for (r = 0; r < bsy; ++r) {
- for (c = 0; c < bsx; ++c)
- dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
- dest += stride;
- input += bsx;
- }
- }
-}
-#endif // CONFIG_EXT_TX
-
-#define FLIPUD_PTR(dest, stride, size) \
- do { \
- (dest) = (dest) + ((size)-1) * (stride); \
- (stride) = -(stride); \
- } while (0)
-
-#if CONFIG_EXT_TX
-static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
- int *sstride, TX_TYPE tx_type, int sizey,
- int sizex) {
- // Note that the transpose of src will be added to dst. In order to LR
- // flip the addends (in dst coordinates), we UD flip the src. To UD flip
- // the addends, we UD flip the dst.
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case IDTX:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST: break;
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST:
- // flip UD
- FLIPUD_PTR(*dst, *dstride, sizey);
- break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- // flip LR
- FLIPUD_PTR(*src, *sstride, sizex);
- break;
- case FLIPADST_FLIPADST:
- // flip UD
- FLIPUD_PTR(*dst, *dstride, sizey);
- // flip LR
- FLIPUD_PTR(*src, *sstride, sizex);
- break;
- default: assert(0); break;
- }
-}
-#endif // CONFIG_EXT_TX
-
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_EXT_TX && CONFIG_TX64X64
-static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
- int stride, int bsx, int bsy, TX_TYPE tx_type,
- int bd) {
- int r, c;
- const int pels = bsx * bsy;
- const int shift = 3 - ((pels > 256) + (pels > 1024));
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
-
- if (tx_type == IDTX) {
- for (r = 0; r < bsy; ++r) {
- for (c = 0; c < bsx; ++c)
- dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
- dest += stride;
- input += bsx;
- }
- }
-}
-#endif // CONFIG_EXT_TX && CONFIG_TX64X64
-#endif // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
-void ilgt4(const tran_low_t *input, tran_low_t *output,
- const tran_high_t *lgtmtx) {
- if (!lgtmtx) assert(0);
-#if CONFIG_LGT_FROM_PRED
- // For DCT/ADST, use butterfly implementations
- if (lgtmtx[0] == DCT4) {
- aom_idct4_c(input, output);
- return;
- } else if (lgtmtx[0] == ADST4) {
- aom_iadst4_c(input, output);
- return;
- }
-#endif // CONFIG_LGT_FROM_PRED
-
- // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4
- tran_high_t s[4] = { 0 };
- for (int i = 0; i < 4; ++i)
- for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];
-
- for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
-}
-
-void ilgt8(const tran_low_t *input, tran_low_t *output,
- const tran_high_t *lgtmtx) {
- if (!lgtmtx) assert(0);
-#if CONFIG_LGT_FROM_PRED
- // For DCT/ADST, use butterfly implementations
- if (lgtmtx[0] == DCT8) {
- aom_idct8_c(input, output);
- return;
- } else if (lgtmtx[0] == ADST8) {
- aom_iadst8_c(input, output);
- return;
- }
-#endif // CONFIG_LGT_FROM_PRED
-
- // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8
- tran_high_t s[8] = { 0 };
- for (int i = 0; i < 8; ++i)
- for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];
-
- for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
-}
-#endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED
-
-#if CONFIG_LGT
-// get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to
-// apply. Otherwise they return 0
-int get_lgt4(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx) {
- if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
- vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
- lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
- return 1;
- } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
- htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
- lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
- return 1;
- }
- lgtmtx[0] = NULL;
- return 0;
-}
-
-int get_lgt8(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx) {
- if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
- vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
- lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
- return 1;
- } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
- htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
- lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
- return 1;
- }
- lgtmtx[0] = NULL;
- return 0;
-}
-#endif // CONFIG_LGT
-
-#if CONFIG_LGT_FROM_PRED
-void ilgt16up(const tran_low_t *input, tran_low_t *output,
- const tran_high_t *lgtmtx) {
- if (lgtmtx[0] == DCT16) {
- aom_idct16_c(input, output);
- return;
- } else if (lgtmtx[0] == ADST16) {
- aom_iadst16_c(input, output);
- return;
- } else if (lgtmtx[0] == DCT32) {
- aom_idct32_c(input, output);
- return;
- } else if (lgtmtx[0] == ADST32) {
- ihalfright32_c(input, output);
- return;
- } else {
- assert(0);
- }
-}
-
-void get_discontinuity_1d(uint8_t *arr, int n, int *idx_max_diff) {
- *idx_max_diff = -1;
-
- int temp = 0, max_diff = 0, min_diff = INT_MAX;
- for (int i = 1; i < n; ++i) {
- temp = abs(arr[i] - arr[i - 1]);
- if (temp > max_diff) {
- max_diff = temp;
- *idx_max_diff = i;
- }
- if (temp < min_diff) min_diff = temp;
- }
-}
-
-void get_discontinuity_2d(uint8_t *dst, int stride, int n, int is_col,
- int *idx_max_diff, int ntx) {
- *idx_max_diff = -1;
-
- int diff = 0, temp = 0, max_diff = 0, min_diff = INT_MAX;
- for (int i = 1; i < n; ++i) {
- temp = 0;
- for (int j = 0; j < ntx; ++j) {
- if (is_col) // vertical diff
- diff = dst[i * stride + j] - dst[(i - 1) * stride + j];
- else // horizontal diff
- diff = dst[j * stride + i] - dst[j * stride + i - 1];
- temp += diff * diff;
- }
- // temp/w is the i-th avg square diff
- if (temp > max_diff) {
- max_diff = temp;
- *idx_max_diff = i;
- }
- if (temp < min_diff) min_diff = temp;
- }
-}
-
-int idx_selfloop_wrt_mode(PREDICTION_MODE mode, int is_col) {
- // 0: no self-loop
- // 1: small self-loop
- // 2: medium self-loop
- // 3: large self-loop
- switch (mode) {
- case DC_PRED:
- case SMOOTH_PRED:
- // predition is good for both directions: large SLs for row and col
- return 3;
- case TM_PRED: return 0;
-#if CONFIG_SMOOTH_HV
- case SMOOTH_H_PRED:
-#endif
- case H_PRED:
- // prediction is good for H direction: large SL for row only
- return is_col ? 0 : 3;
-#if CONFIG_SMOOTH_HV
- case SMOOTH_V_PRED:
-#endif
- case V_PRED:
- // prediction is good for V direction: large SL for col only
- return is_col ? 3 : 0;
-#if LGT_SL_INTRA
- // directional mode: choose SL based on the direction
- case D45_PRED: return is_col ? 2 : 0;
- case D63_PRED: return is_col ? 3 : 0;
- case D117_PRED: return is_col ? 3 : 1;
- case D135_PRED: return 2;
- case D153_PRED: return is_col ? 1 : 3;
- case D207_PRED: return is_col ? 0 : 3;
-#else
- case D45_PRED:
- case D63_PRED:
- case D117_PRED: return is_col ? 3 : 0;
- case D135_PRED:
- case D153_PRED:
- case D207_PRED: return is_col ? 0 : 3;
-#endif
- // inter: no SL
- default: return 0;
- }
-}
-
-void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx) {
- PREDICTION_MODE mode = txfm_param->mode;
- int stride = txfm_param->stride;
- uint8_t *dst = txfm_param->dst;
- int bp = -1;
- uint8_t arr[4];
-
- // Each lgt4mtx_arr[k][i] corresponds to a line graph with a self-loop on
- // the first node, and possibly a weak edge within the line graph. i is
- // the index of the weak edge (between the i-th and (i+1)-th pixels, i=0
- // means no weak edge). k corresponds to the first self-loop's weight
- const tran_high_t *lgt4mtx_arr[4][4] = {
- { &lgt4_000[0][0], &lgt4_000w1[0][0], &lgt4_000w2[0][0],
- &lgt4_000w3[0][0] },
- { &lgt4_060[0][0], &lgt4_060_000w1[0][0], &lgt4_060_000w2[0][0],
- &lgt4_060_000w3[0][0] },
- { &lgt4_100[0][0], &lgt4_100_000w1[0][0], &lgt4_100_000w2[0][0],
- &lgt4_100_000w3[0][0] },
- { &lgt4_150[0][0], &lgt4_150_000w1[0][0], &lgt4_150_000w2[0][0],
- &lgt4_150_000w3[0][0] },
- };
-
- // initialize to DCT or some LGTs, and then change later if necessary
- int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
- lgtmtx[0] = lgt4mtx_arr[idx_sl][0];
-
- // find the break point and replace the line graph by the one with a
- // break point
- if (mode == DC_PRED || mode == SMOOTH_PRED) {
- // Do not use break point, since 1) is_left_available and is_top_available
- // in DC_PRED are not known by txfm_param for now, so accessing
- // both boundaries anyway may cause a mismatch 2) DC prediciton
- // typically yields very smooth residues so having the break point
- // does not usually improve the RD result.
- return;
- } else if (mode == TM_PRED) {
- // TM_PRED: use both 1D top boundary and 1D left boundary
- if (is_col)
- for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
- else
- for (int i = 0; i < 4; ++i) arr[i] = dst[i];
- get_discontinuity_1d(&arr[0], 4, &bp);
- } else if (mode == V_PRED) {
- // V_PRED: use 1D top boundary only
- if (is_col) return;
- for (int i = 0; i < 4; ++i) arr[i] = dst[i];
- get_discontinuity_1d(&arr[0], 4, &bp);
- } else if (mode == H_PRED) {
- // H_PRED: use 1D left boundary only
- if (!is_col) return;
- for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
- get_discontinuity_1d(&arr[0], 4, &bp);
-#if CONFIG_SMOOTH_HV
- } else if (mode == SMOOTH_V_PRED) {
- if (is_col) return;
- for (int i = 0; i < 4; ++i) arr[i] = dst[-stride + i];
- get_discontinuity_1d(&arr[0], 4, &bp);
- } else if (mode == SMOOTH_H_PRED) {
- if (!is_col) return;
- for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride - 1];
- get_discontinuity_1d(&arr[0], 4, &bp);
-#endif
- } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
- // directional modes closer to vertical (maybe include D135 later)
- if (!is_col) get_discontinuity_2d(dst, stride, 4, 0, &bp, ntx);
- } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
- // directional modes closer to horizontal
- if (is_col) get_discontinuity_2d(dst, stride, 4, 1, &bp, ntx);
- } else if (mode > TM_PRED) {
- // inter
- get_discontinuity_2d(dst, stride, 4, is_col, &bp, ntx);
- }
-
-#if LGT_SL_INTRA
- if (bp != -1) lgtmtx[0] = lgt4mtx_arr[idx_sl][bp];
-#else
- if (bp != -1) lgtmtx[0] = lgt4mtx_arr[0][bp];
-#endif
-}
-
-void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx) {
- PREDICTION_MODE mode = txfm_param->mode;
- int stride = txfm_param->stride;
- uint8_t *dst = txfm_param->dst;
- int bp = -1;
- uint8_t arr[8];
-
- const tran_high_t *lgt8mtx_arr[4][8] = {
- { &lgt8_000[0][0], &lgt8_000w1[0][0], &lgt8_000w2[0][0], &lgt8_000w3[0][0],
- &lgt8_000w4[0][0], &lgt8_000w5[0][0], &lgt8_000w6[0][0],
- &lgt8_000w7[0][0] },
- { &lgt8_060[0][0], &lgt8_060_000w1[0][0], &lgt8_060_000w2[0][0],
- &lgt8_060_000w3[0][0], &lgt8_060_000w4[0][0], &lgt8_060_000w5[0][0],
- &lgt8_060_000w6[0][0], &lgt8_060_000w7[0][0] },
- { &lgt8_100[0][0], &lgt8_100_000w1[0][0], &lgt8_100_000w2[0][0],
- &lgt8_100_000w3[0][0], &lgt8_100_000w4[0][0], &lgt8_100_000w5[0][0],
- &lgt8_100_000w6[0][0], &lgt8_100_000w7[0][0] },
- { &lgt8_150[0][0], &lgt8_150_000w1[0][0], &lgt8_150_000w2[0][0],
- &lgt8_150_000w3[0][0], &lgt8_150_000w4[0][0], &lgt8_150_000w5[0][0],
- &lgt8_150_000w6[0][0], &lgt8_150_000w7[0][0] },
- };
-
- int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
- lgtmtx[0] = lgt8mtx_arr[idx_sl][0];
-
- if (mode == DC_PRED || mode == SMOOTH_PRED) {
- return;
- } else if (mode == TM_PRED) {
- if (is_col)
- for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
- else
- for (int i = 0; i < 8; ++i) arr[i] = dst[i];
- get_discontinuity_1d(&arr[0], 8, &bp);
- } else if (mode == V_PRED) {
- if (is_col) return;
- for (int i = 0; i < 8; ++i) arr[i] = dst[i];
- get_discontinuity_1d(&arr[0], 8, &bp);
- } else if (mode == H_PRED) {
- if (!is_col) return;
- for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
- get_discontinuity_1d(&arr[0], 8, &bp);
-#if CONFIG_SMOOTH_HV
- } else if (mode == SMOOTH_V_PRED) {
- if (is_col) return;
- for (int i = 0; i < 8; ++i) arr[i] = dst[-stride + i];
- get_discontinuity_1d(&arr[0], 8, &bp);
- } else if (mode == SMOOTH_H_PRED) {
- if (!is_col) return;
- for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride - 1];
- get_discontinuity_1d(&arr[0], 8, &bp);
-#endif
- } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
- if (!is_col) get_discontinuity_2d(dst, stride, 8, 0, &bp, ntx);
- } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
- if (is_col) get_discontinuity_2d(dst, stride, 8, 1, &bp, ntx);
- } else if (mode > TM_PRED) {
- get_discontinuity_2d(dst, stride, 8, is_col, &bp, ntx);
- }
-
-#if LGT_SL_INTRA
- if (bp != -1) lgtmtx[0] = lgt8mtx_arr[idx_sl][bp];
-#else
- if (bp != -1) lgtmtx[0] = lgt8mtx_arr[0][bp];
-#endif
-}
-
-// Since LGTs with length >8 are not implemented now, the following function
-// will just call DCT or ADST
-void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx) {
- int tx_length = is_col ? tx_size_high[txfm_param->tx_size]
- : tx_size_wide[txfm_param->tx_size];
- assert(tx_length == 16 || tx_length == 32);
- PREDICTION_MODE mode = txfm_param->mode;
-
- (void)ntx;
- const tran_high_t *dctmtx =
- tx_length == 16 ? &lgt16_000[0][0] : &lgt32_000[0][0];
- const tran_high_t *adstmtx =
- tx_length == 16 ? &lgt16_200[0][0] : &lgt32_200[0][0];
-
- switch (mode) {
- case DC_PRED:
- case TM_PRED:
- case SMOOTH_PRED:
- // prediction from both top and left -> ADST
- lgtmtx[0] = adstmtx;
- break;
- case V_PRED:
- case D45_PRED:
- case D63_PRED:
- case D117_PRED:
-#if CONFIG_SMOOTH_HV
- case SMOOTH_V_PRED:
-#endif
- // prediction from the top more than from the left -> ADST
- lgtmtx[0] = is_col ? adstmtx : dctmtx;
- break;
- case H_PRED:
- case D135_PRED:
- case D153_PRED:
- case D207_PRED:
-#if CONFIG_SMOOTH_HV
- case SMOOTH_H_PRED:
-#endif
- // prediction from the left more than from the top -> DCT
- lgtmtx[0] = is_col ? dctmtx : adstmtx;
- break;
- default: lgtmtx[0] = dctmtx; break;
- }
-}
-
-typedef void (*IlgtFunc)(const tran_low_t *input, tran_low_t *output,
- const tran_high_t *lgtmtx);
-
-static IlgtFunc ilgt_func[4] = { ilgt4, ilgt8, ilgt16up, ilgt16up };
-
-typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx);
-
-static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
- get_lgt16up_from_pred,
- get_lgt16up_from_pred };
-
-// this inline function corresponds to the up scaling before the transpose
-// operation in the av1_iht* functions
-static INLINE tran_low_t inv_upscale_wrt_txsize(const tran_high_t val,
- const TX_SIZE tx_size) {
- switch (tx_size) {
- case TX_4X4:
- case TX_8X8:
- case TX_4X16:
- case TX_16X4:
- case TX_8X32:
- case TX_32X8: return (tran_low_t)val;
- case TX_4X8:
- case TX_8X4:
- case TX_8X16:
- case TX_16X8: return (tran_low_t)dct_const_round_shift(val * Sqrt2);
- default: assert(0); break;
- }
- return 0;
-}
-
-// This inline function corresponds to the bit shift before summing with the
-// destination in the av1_iht* functions
-static INLINE tran_low_t inv_downscale_wrt_txsize(const tran_low_t val,
- const TX_SIZE tx_size) {
- switch (tx_size) {
- case TX_4X4: return ROUND_POWER_OF_TWO(val, 4);
- case TX_4X8:
- case TX_8X4:
- case TX_8X8:
- case TX_4X16:
- case TX_16X4: return ROUND_POWER_OF_TWO(val, 5);
- case TX_8X16:
- case TX_16X8:
- case TX_8X32:
- case TX_32X8: return ROUND_POWER_OF_TWO(val, 6);
- default: assert(0); break;
- }
- return 0;
-}
-
-void ilgt2d_from_pred_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_SIZE tx_size = txfm_param->tx_size;
- const int w = tx_size_wide[tx_size];
- const int h = tx_size_high[tx_size];
- const int wlog2 = tx_size_wide_log2[tx_size];
- const int hlog2 = tx_size_high_log2[tx_size];
- assert(w <= 8 || h <= 8);
-
- int i, j;
- // largest 1D size allowed for LGT: 32
- // largest 2D size allowed for LGT: 8x32=256
- tran_low_t tmp[256], out[256], temp1d[32];
- const tran_high_t *lgtmtx_col[1];
- const tran_high_t *lgtmtx_row[1];
- get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
- get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
-
-// for inverse transform, to be consistent with av1_iht functions, we always
-// apply row transforms first and column transforms second, but both
-// row-first and column-first versions are implemented here for future
-// tests (use different lgtmtx_col[i], and choose row or column tx first
-// depending on transforms).
-#if 1
- // inverse column transforms
- for (i = 0; i < w; ++i) {
- // transpose
- for (j = 0; j < h; ++j) tmp[i * h + j] = input[j * w + i];
- ilgt_func[hlog2 - 2](&tmp[i * h], temp1d, lgtmtx_col[0]);
- // upscale, and store in place
- for (j = 0; j < h; ++j)
- tmp[i * h + j] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
- }
- // inverse row transforms
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) temp1d[j] = tmp[j * h + i];
- ilgt_func[wlog2 - 2](temp1d, &out[i * w], lgtmtx_row[0]);
- }
- // downscale + sum with the destination
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int d = i * stride + j;
- int s = i * w + j;
- dest[d] =
- clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
- }
- }
-#else
- // inverse row transforms
- for (i = 0; i < h; ++i) {
- ilgt_func[wlog2 - 2](input, temp1d, lgtmtx_row[0]);
- // upscale and transpose (tmp[j*h+i] <--> tmp[j][i])
- for (j = 0; j < w; ++j)
- tmp[j * h + i] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
- input += w;
- }
- // inverse column transforms
- for (i = 0; i < w; ++i)
- ilgt_func[hlog2 - 2](&tmp[i * h], &out[i * h], lgtmtx_col[0]);
- // here, out[] is the transpose of 2D block of transform coefficients
-
- // downscale + transform + sum with dest
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int d = i * stride + j;
- int s = j * h + i;
- dest[d] =
- clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
- }
- }
-#endif
-}
-#endif // CONFIG_LGT_FROM_PRED
-
-void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if !CONFIG_DAALA_DCT4
- if (tx_type == DCT_DCT) {
- aom_idct4x4_16_add(input, dest, stride);
- return;
- }
-#endif
- static const transform_2d IHT_4[] = {
-#if CONFIG_DAALA_DCT4
- { daala_idct4, daala_idct4 }, // DCT_DCT = 0
- { daala_idst4, daala_idct4 }, // ADST_DCT = 1
- { daala_idct4, daala_idst4 }, // DCT_ADST = 2
- { daala_idst4, daala_idst4 }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { daala_idst4, daala_idct4 }, // FLIPADST_DCT
- { daala_idct4, daala_idst4 }, // DCT_FLIPADST
- { daala_idst4, daala_idst4 }, // FLIPADST_FLIPADST
- { daala_idst4, daala_idst4 }, // ADST_FLIPADST
- { daala_idst4, daala_idst4 }, // FLIPADST_ADST
- { daala_idtx4, daala_idtx4 }, // IDTX
- { daala_idct4, daala_idtx4 }, // V_DCT
- { daala_idtx4, daala_idct4 }, // H_DCT
- { daala_idst4, daala_idtx4 }, // V_ADST
- { daala_idtx4, daala_idst4 }, // H_ADST
- { daala_idst4, daala_idtx4 }, // V_FLIPADST
- { daala_idtx4, daala_idst4 }, // H_FLIPADST
-#endif
-#else
- { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
- { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
- { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
- { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
- { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
- { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
- { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
- { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
- { iidtx4_c, iidtx4_c }, // IDTX
- { aom_idct4_c, iidtx4_c }, // V_DCT
- { iidtx4_c, aom_idct4_c }, // H_DCT
- { aom_iadst4_c, iidtx4_c }, // V_ADST
- { iidtx4_c, aom_iadst4_c }, // H_ADST
- { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
- { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
-#endif
-#endif
- };
-
- int i, j;
- tran_low_t tmp[4][4];
- tran_low_t out[4][4];
- tran_low_t *outp = &out[0][0];
- int outstride = 4;
-
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
- int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors
- for (i = 0; i < 4; ++i) {
-#if CONFIG_DAALA_DCT4
- tran_low_t temp_in[4];
- for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
- IHT_4[tx_type].rows(temp_in, out[i]);
-#else
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt4(input, out[i], lgtmtx_row[0]);
- else
-#endif
- IHT_4[tx_type].rows(input, out[i]);
-#endif
- input += 4;
- }
-
- // transpose
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- tmp[j][i] = out[i][j];
- }
- }
-
- // inverse transform column vectors
- for (i = 0; i < 4; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt4(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_4[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
-#endif
-
- // Sum with the destination
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
-#if CONFIG_DAALA_DCT4
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
-#else
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
-#endif
- }
- }
-}
-
-void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_4x8[] = {
- { aom_idct8_c, aom_idct4_c }, // DCT_DCT
- { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
- { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
- { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
- { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
- { iidtx8_c, iidtx4_c }, // IDTX
- { aom_idct8_c, iidtx4_c }, // V_DCT
- { iidtx8_c, aom_idct4_c }, // H_DCT
- { aom_iadst8_c, iidtx4_c }, // V_ADST
- { iidtx8_c, aom_iadst4_c }, // H_ADST
- { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
- { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 4;
- const int n2 = 8;
- int i, j;
- tran_low_t out[4][8], tmp[4][8], outtmp[4];
- tran_low_t *outp = &out[0][0];
- int outstride = n2;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
- int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n2; ++i) {
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt4(input, outtmp, lgtmtx_row[0]);
- else
-#endif
- IHT_4x8[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt8(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_4x8[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_8x4[] = {
- { aom_idct4_c, aom_idct8_c }, // DCT_DCT
- { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
- { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
- { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
- { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
- { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
- { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
- { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
- { iidtx4_c, iidtx8_c }, // IDTX
- { aom_idct4_c, iidtx8_c }, // V_DCT
- { iidtx4_c, aom_idct8_c }, // H_DCT
- { aom_iadst4_c, iidtx8_c }, // V_ADST
- { iidtx4_c, aom_iadst8_c }, // H_ADST
- { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
- { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 4;
- const int n2 = 8;
-
- int i, j;
- tran_low_t out[8][4], tmp[8][4], outtmp[8];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
- int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt8(input, outtmp, lgtmtx_row[0]);
- else
-#endif
- IHT_8x4[tx_type].rows(input, outtmp);
- for (j = 0; j < n2; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n2;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n2; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt4(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_8x4[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_4x16[] = {
- { aom_idct16_c, aom_idct4_c }, // DCT_DCT
- { aom_iadst16_c, aom_idct4_c }, // ADST_DCT
- { aom_idct16_c, aom_iadst4_c }, // DCT_ADST
- { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT
- { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST
- { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST
- { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST
- { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST
- { iidtx16_c, iidtx4_c }, // IDTX
- { aom_idct16_c, iidtx4_c }, // V_DCT
- { iidtx16_c, aom_idct4_c }, // H_DCT
- { aom_iadst16_c, iidtx4_c }, // V_ADST
- { iidtx16_c, aom_iadst4_c }, // H_ADST
- { aom_iadst16_c, iidtx4_c }, // V_FLIPADST
- { iidtx16_c, aom_iadst4_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 4;
- const int n4 = 16;
- int i, j;
- tran_low_t out[4][16], tmp[4][16], outtmp[4];
- tran_low_t *outp = &out[0][0];
- int outstride = n4;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n4; ++i) {
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt4(input, outtmp, lgtmtx_row[0]);
- else
-#endif
- IHT_4x16[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) {
- IHT_4x16[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_16x4[] = {
- { aom_idct4_c, aom_idct16_c }, // DCT_DCT
- { aom_iadst4_c, aom_idct16_c }, // ADST_DCT
- { aom_idct4_c, aom_iadst16_c }, // DCT_ADST
- { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT
- { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST
- { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST
- { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST
- { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST
- { iidtx4_c, iidtx16_c }, // IDTX
- { aom_idct4_c, iidtx16_c }, // V_DCT
- { iidtx4_c, aom_idct16_c }, // H_DCT
- { aom_iadst4_c, iidtx16_c }, // V_ADST
- { iidtx4_c, aom_iadst16_c }, // H_ADST
- { aom_iadst4_c, iidtx16_c }, // V_FLIPADST
- { iidtx4_c, aom_iadst16_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 4;
- const int n4 = 16;
-
- int i, j;
- tran_low_t out[16][4], tmp[16][4], outtmp[16];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
- IHT_16x4[tx_type].rows(input, outtmp);
- for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
- input += n4;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n4; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt4(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_16x4[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_8x16[] = {
- { aom_idct16_c, aom_idct8_c }, // DCT_DCT
- { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
- { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
- { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
- { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
- { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
- { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
- { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
- { iidtx16_c, iidtx8_c }, // IDTX
- { aom_idct16_c, iidtx8_c }, // V_DCT
- { iidtx16_c, aom_idct8_c }, // H_DCT
- { aom_iadst16_c, iidtx8_c }, // V_ADST
- { iidtx16_c, aom_iadst8_c }, // H_ADST
- { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
- { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 8;
- const int n2 = 16;
- int i, j;
- tran_low_t out[8][16], tmp[8][16], outtmp[8];
- tran_low_t *outp = &out[0][0];
- int outstride = n2;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n2; ++i) {
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt8(input, outtmp, lgtmtx_row[0]);
- else
-#endif
- IHT_8x16[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) {
- IHT_8x16[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_16x8[] = {
- { aom_idct8_c, aom_idct16_c }, // DCT_DCT
- { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
- { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
- { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
- { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
- { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
- { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
- { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
- { iidtx8_c, iidtx16_c }, // IDTX
- { aom_idct8_c, iidtx16_c }, // V_DCT
- { iidtx8_c, aom_idct16_c }, // H_DCT
- { aom_iadst8_c, iidtx16_c }, // V_ADST
- { iidtx8_c, aom_iadst16_c }, // H_ADST
- { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
- { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 8;
- const int n2 = 16;
-
- int i, j;
- tran_low_t out[16][8], tmp[16][8], outtmp[16];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
- IHT_16x8[tx_type].rows(input, outtmp);
- for (j = 0; j < n2; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n2;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n2; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt8(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_16x8[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_8x32[] = {
- { aom_idct32_c, aom_idct8_c }, // DCT_DCT
- { ihalfright32_c, aom_idct8_c }, // ADST_DCT
- { aom_idct32_c, aom_iadst8_c }, // DCT_ADST
- { ihalfright32_c, aom_iadst8_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT
- { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST
- { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST
- { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST
- { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST
- { iidtx32_c, iidtx8_c }, // IDTX
- { aom_idct32_c, iidtx8_c }, // V_DCT
- { iidtx32_c, aom_idct8_c }, // H_DCT
- { ihalfright32_c, iidtx8_c }, // V_ADST
- { iidtx32_c, aom_iadst8_c }, // H_ADST
- { ihalfright32_c, iidtx8_c }, // V_FLIPADST
- { iidtx32_c, aom_iadst8_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 8;
- const int n4 = 32;
- int i, j;
- tran_low_t out[8][32], tmp[8][32], outtmp[8];
- tran_low_t *outp = &out[0][0];
- int outstride = n4;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n4; ++i) {
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt8(input, outtmp, lgtmtx_row[0]);
- else
-#endif
- IHT_8x32[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) {
- IHT_8x32[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_32x8[] = {
- { aom_idct8_c, aom_idct32_c }, // DCT_DCT
- { aom_iadst8_c, aom_idct32_c }, // ADST_DCT
- { aom_idct8_c, ihalfright32_c }, // DCT_ADST
- { aom_iadst8_c, ihalfright32_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT
- { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST
- { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST
- { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST
- { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST
- { iidtx8_c, iidtx32_c }, // IDTX
- { aom_idct8_c, iidtx32_c }, // V_DCT
- { iidtx8_c, aom_idct32_c }, // H_DCT
- { aom_iadst8_c, iidtx32_c }, // V_ADST
- { iidtx8_c, ihalfright32_c }, // H_ADST
- { aom_iadst8_c, iidtx32_c }, // V_FLIPADST
- { iidtx8_c, ihalfright32_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 8;
- const int n4 = 32;
-
- int i, j;
- tran_low_t out[32][8], tmp[32][8], outtmp[32];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
-#endif
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
- IHT_32x8[tx_type].rows(input, outtmp);
- for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
- input += n4;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n4; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt8(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_32x8[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_16x32[] = {
- { aom_idct32_c, aom_idct16_c }, // DCT_DCT
- { ihalfright32_c, aom_idct16_c }, // ADST_DCT
- { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
- { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
- { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
- { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
- { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
- { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
- { iidtx32_c, iidtx16_c }, // IDTX
- { aom_idct32_c, iidtx16_c }, // V_DCT
- { iidtx32_c, aom_idct16_c }, // H_DCT
- { ihalfright32_c, iidtx16_c }, // V_ADST
- { iidtx32_c, aom_iadst16_c }, // H_ADST
- { ihalfright32_c, iidtx16_c }, // V_FLIPADST
- { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 16;
- const int n2 = 32;
- int i, j;
- tran_low_t out[16][32], tmp[16][32], outtmp[16];
- tran_low_t *outp = &out[0][0];
- int outstride = n2;
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n2; ++i) {
- IHT_16x32[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_32x16[] = {
- { aom_idct16_c, aom_idct32_c }, // DCT_DCT
- { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
- { aom_idct16_c, ihalfright32_c }, // DCT_ADST
- { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
- { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
- { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
- { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
- { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
- { iidtx16_c, iidtx32_c }, // IDTX
- { aom_idct16_c, iidtx32_c }, // V_DCT
- { iidtx16_c, aom_idct32_c }, // H_DCT
- { aom_iadst16_c, iidtx32_c }, // V_ADST
- { iidtx16_c, ihalfright32_c }, // H_ADST
- { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
- { iidtx16_c, ihalfright32_c }, // H_FLIPADST
-#endif
- };
- const int n = 16;
- const int n2 = 32;
-
- int i, j;
- tran_low_t out[32][16], tmp[32][16], outtmp[32];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
- IHT_32x16[tx_type].rows(input, outtmp);
- for (j = 0; j < n2; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n2;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
- }
- }
-}
-
-void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_8[] = {
-#if CONFIG_DAALA_DCT8
- { daala_idct8, daala_idct8 }, // DCT_DCT = 0
- { daala_idst8, daala_idct8 }, // ADST_DCT = 1
- { daala_idct8, daala_idst8 }, // DCT_ADST = 2
- { daala_idst8, daala_idst8 }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { daala_idst8, daala_idct8 }, // FLIPADST_DCT
- { daala_idct8, daala_idst8 }, // DCT_FLIPADST
- { daala_idst8, daala_idst8 }, // FLIPADST_FLIPADST
- { daala_idst8, daala_idst8 }, // ADST_FLIPADST
- { daala_idst8, daala_idst8 }, // FLIPADST_ADST
- { daala_idtx8, daala_idtx8 }, // IDTX
- { daala_idct8, daala_idtx8 }, // V_DCT
- { daala_idtx8, daala_idct8 }, // H_DCT
- { daala_idst8, daala_idtx8 }, // V_ADST
- { daala_idtx8, daala_idst8 }, // H_ADST
- { daala_idst8, daala_idtx8 }, // V_FLIPADST
- { daala_idtx8, daala_idst8 }, // H_FLIPADST
-#endif
-#else
- { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
- { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
- { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
- { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
- { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
- { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
- { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
- { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
- { iidtx8_c, iidtx8_c }, // IDTX
- { aom_idct8_c, iidtx8_c }, // V_DCT
- { iidtx8_c, aom_idct8_c }, // H_DCT
- { aom_iadst8_c, iidtx8_c }, // V_ADST
- { iidtx8_c, aom_iadst8_c }, // H_ADST
- { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
- { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
-#endif
-#endif
- };
-
- int i, j;
- tran_low_t tmp[8][8];
- tran_low_t out[8][8];
- tran_low_t *outp = &out[0][0];
- int outstride = 8;
-
-#if CONFIG_LGT
- const tran_high_t *lgtmtx_col[1];
- const tran_high_t *lgtmtx_row[1];
- int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
- int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
-#endif
-
- // inverse transform row vectors
- for (i = 0; i < 8; ++i) {
-#if CONFIG_DAALA_DCT8
- tran_low_t temp_in[8];
- for (j = 0; j < 8; j++) temp_in[j] = input[j] * 2;
- IHT_8[tx_type].rows(temp_in, out[i]);
-#else
-#if CONFIG_LGT
- if (use_lgt_row)
- ilgt8(input, out[i], lgtmtx_row[0]);
- else
-#endif
- IHT_8[tx_type].rows(input, out[i]);
-#endif
- input += 8;
- }
-
- // transpose
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- tmp[j][i] = out[i][j];
- }
- }
-
- // inverse transform column vectors
- for (i = 0; i < 8; ++i) {
-#if CONFIG_LGT
- if (use_lgt_col)
- ilgt8(tmp[i], out[i], lgtmtx_col[0]);
- else
-#endif
- IHT_8[tx_type].cols(tmp[i], out[i]);
- }
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
-#endif
-
- // Sum with the destination
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
-#if CONFIG_DAALA_DCT8
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
-#else
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
-#endif
- }
- }
-}
-
-void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_16[] = {
-#if CONFIG_DAALA_DCT16
- { daala_idct16, daala_idct16 }, // DCT_DCT = 0
- { daala_idst16, daala_idct16 }, // ADST_DCT = 1
- { daala_idct16, daala_idst16 }, // DCT_ADST = 2
- { daala_idst16, daala_idst16 }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { daala_idst16, daala_idct16 }, // FLIPADST_DCT
- { daala_idct16, daala_idst16 }, // DCT_FLIPADST
- { daala_idst16, daala_idst16 }, // FLIPADST_FLIPADST
- { daala_idst16, daala_idst16 }, // ADST_FLIPADST
- { daala_idst16, daala_idst16 }, // FLIPADST_ADST
- { daala_idtx16, daala_idtx16 }, // IDTX
- { daala_idct16, daala_idtx16 }, // V_DCT
- { daala_idtx16, daala_idct16 }, // H_DCT
- { daala_idst16, daala_idtx16 }, // V_ADST
- { daala_idtx16, daala_idst16 }, // H_ADST
- { daala_idst16, daala_idtx16 }, // V_FLIPADST
- { daala_idtx16, daala_idst16 }, // H_FLIPADST
-#endif
-#else
- { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
- { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
- { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
- { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
-#if CONFIG_EXT_TX
- { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
- { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
- { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
- { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
- { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
- { iidtx16_c, iidtx16_c }, // IDTX
- { aom_idct16_c, iidtx16_c }, // V_DCT
- { iidtx16_c, aom_idct16_c }, // H_DCT
- { aom_iadst16_c, iidtx16_c }, // V_ADST
- { iidtx16_c, aom_iadst16_c }, // H_ADST
- { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
- { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
-#endif
-#endif
- };
-
- int i, j;
- tran_low_t tmp[16][16];
- tran_low_t out[16][16];
- tran_low_t *outp = &out[0][0];
- int outstride = 16;
-
- // inverse transform row vectors
- for (i = 0; i < 16; ++i) {
-#if CONFIG_DAALA_DCT16
- tran_low_t temp_in[16];
- for (j = 0; j < 16; j++) temp_in[j] = input[j] * 2;
- IHT_16[tx_type].rows(temp_in, out[i]);
-#else
- IHT_16[tx_type].rows(input, out[i]);
-#endif
- input += 16;
- }
-
- // transpose
- for (i = 0; i < 16; i++) {
- for (j = 0; j < 16; j++) {
- tmp[j][i] = out[i][j];
- }
- }
-
- // inverse transform column vectors
- for (i = 0; i < 16; ++i) IHT_16[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
-#endif
-
- // Sum with the destination
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
-#if CONFIG_DAALA_DCT16
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
-#else
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
-#endif
- }
- }
-}
-
-#if CONFIG_EXT_TX || CONFIG_DAALA_DCT32
-void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_32[] = {
-#if CONFIG_DAALA_DCT32
- { daala_idct32, daala_idct32 }, // DCT_DCT
-#if CONFIG_EXT_TX
- { daala_idst32, daala_idct32 }, // ADST_DCT
- { daala_idct32, daala_idst32 }, // DCT_ADST
- { daala_idst32, daala_idst32 }, // ADST_ADST
- { daala_idst32, daala_idct32 }, // FLIPADST_DCT
- { daala_idct32, daala_idst32 }, // DCT_FLIPADST
- { daala_idst32, daala_idst32 }, // FLIPADST_FLIPADST
- { daala_idst32, daala_idst32 }, // ADST_FLIPADST
- { daala_idst32, daala_idst32 }, // FLIPADST_ADST
- { daala_idtx32, daala_idtx32 }, // IDTX
- { daala_idct32, daala_idtx32 }, // V_DCT
- { daala_idtx32, daala_idct32 }, // H_DCT
- { daala_idst32, daala_idtx32 }, // V_ADST
- { daala_idtx32, daala_idst32 }, // H_ADST
- { daala_idst32, daala_idtx32 }, // V_FLIPADST
- { daala_idtx32, daala_idst32 }, // H_FLIPADST
-#endif
-#else
- { aom_idct32_c, aom_idct32_c }, // DCT_DCT
-#if CONFIG_EXT_TX
- { ihalfright32_c, aom_idct32_c }, // ADST_DCT
- { aom_idct32_c, ihalfright32_c }, // DCT_ADST
- { ihalfright32_c, ihalfright32_c }, // ADST_ADST
- { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
- { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
- { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
- { iidtx32_c, iidtx32_c }, // IDTX
- { aom_idct32_c, iidtx32_c }, // V_DCT
- { iidtx32_c, aom_idct32_c }, // H_DCT
- { ihalfright32_c, iidtx32_c }, // V_ADST
- { iidtx32_c, ihalfright32_c }, // H_ADST
- { ihalfright32_c, iidtx32_c }, // V_FLIPADST
- { iidtx32_c, ihalfright32_c }, // H_FLIPADST
-#endif
-#endif
- };
-
- int i, j;
- tran_low_t tmp[32][32];
- tran_low_t out[32][32];
- tran_low_t *outp = &out[0][0];
- int outstride = 32;
-
- // inverse transform row vectors
- for (i = 0; i < 32; ++i) {
-#if CONFIG_DAALA_DCT32
- tran_low_t temp_in[32];
- for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2;
- IHT_32[tx_type].rows(temp_in, out[i]);
-#else
- IHT_32[tx_type].rows(input, out[i]);
-#endif
- input += 32;
- }
-
- // transpose
- for (i = 0; i < 32; i++) {
- for (j = 0; j < 32; j++) {
-#if CONFIG_DAALA_DCT32
- tmp[j][i] = out[i][j] * 4;
-#else
- tmp[j][i] = out[i][j];
-#endif
- }
- }
-
- // inverse transform column vectors
- for (i = 0; i < 32; ++i) IHT_32[tx_type].cols(tmp[i], out[i]);
-
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
-
- // Sum with the destination
- for (i = 0; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
-#if CONFIG_DAALA_DCT32
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
-#else
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
-#endif
- }
- }
-}
-#endif // CONFIG_EXT_TX || CONFIG_DAALA_DCT32
-
-#if CONFIG_TX64X64
-void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_64[] = {
-#if CONFIG_DAALA_DCT64
- { daala_idct64, daala_idct64 }, // DCT_DCT
- { daala_idst64, daala_idct64 }, // ADST_DCT
- { daala_idct64, daala_idst64 }, // DCT_ADST
- { daala_idst64, daala_idst64 }, // ADST_ADST
-#if CONFIG_EXT_TX
- { daala_idst64, daala_idct64 }, // FLIPADST_DCT
- { daala_idct64, daala_idst64 }, // DCT_FLIPADST
- { daala_idst64, daala_idst64 }, // FLIPADST_FLIPADST
- { daala_idst64, daala_idst64 }, // ADST_FLIPADST
- { daala_idst64, daala_idst64 }, // FLIPADST_ADST
- { daala_idtx64, daala_idtx64 }, // IDTX
- { daala_idct64, daala_idtx64 }, // V_DCT
- { daala_idtx64, daala_idct64 }, // H_DCT
- { daala_idst64, daala_idtx64 }, // V_ADST
- { daala_idtx64, daala_idst64 }, // H_ADST
- { daala_idst64, daala_idtx64 }, // V_FLIPADST
- { daala_idtx64, daala_idst64 }, // H_FLIPADST
-#endif
-#else
- { idct64_col_c, idct64_row_c }, // DCT_DCT
- { ihalfright64_c, idct64_row_c }, // ADST_DCT
- { idct64_col_c, ihalfright64_c }, // DCT_ADST
- { ihalfright64_c, ihalfright64_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
- { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
- { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
- { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
- { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
- { iidtx64_c, iidtx64_c }, // IDTX
- { idct64_col_c, iidtx64_c }, // V_DCT
- { iidtx64_c, idct64_row_c }, // H_DCT
- { ihalfright64_c, iidtx64_c }, // V_ADST
- { iidtx64_c, ihalfright64_c }, // H_ADST
- { ihalfright64_c, iidtx64_c }, // V_FLIPADST
- { iidtx64_c, ihalfright64_c }, // H_FLIPADST
-#endif
-#endif
- };
-
- int i, j;
- tran_low_t tmp[64][64];
- tran_low_t out[64][64];
- tran_low_t *outp = &out[0][0];
- int outstride = 64;
-
- // inverse transform row vectors
- for (i = 0; i < 64; ++i) {
-#if CONFIG_DAALA_DCT64
- tran_low_t temp_in[64];
- for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2;
- IHT_64[tx_type].rows(temp_in, out[i]);
-// Do not rescale intermediate for Daala
-#else
- IHT_64[tx_type].rows(input, out[i]);
- for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
-#endif
- input += 64;
- }
-
- // transpose
- for (i = 0; i < 64; i++) {
- for (j = 0; j < 64; j++) {
- tmp[j][i] = out[i][j];
- }
- }
-
- // inverse transform column vectors
- for (i = 0; i < 64; ++i) IHT_64[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
-#endif // CONFIG_EXT_TX
-
- // Sum with the destination
- for (i = 0; i < 64; ++i) {
- for (j = 0; j < 64; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
-#if CONFIG_DAALA_DCT64
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2));
-#else
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
-#endif
- }
- }
-}
-
-void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_64x32[] = {
- { aom_idct32_c, idct64_row_c }, // DCT_DCT
- { ihalfright32_c, idct64_row_c }, // ADST_DCT
- { aom_idct32_c, ihalfright64_c }, // DCT_ADST
- { ihalfright32_c, ihalfright64_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { ihalfright32_c, idct64_row_c }, // FLIPADST_DCT
- { aom_idct32_c, ihalfright64_c }, // DCT_FLIPADST
- { ihalfright32_c, ihalfright64_c }, // FLIPADST_FLIPADST
- { ihalfright32_c, ihalfright64_c }, // ADST_FLIPADST
- { ihalfright32_c, ihalfright64_c }, // FLIPADST_ADST
- { iidtx32_c, iidtx64_c }, // IDTX
- { aom_idct32_c, iidtx64_c }, // V_DCT
- { iidtx32_c, idct64_row_c }, // H_DCT
- { ihalfright32_c, iidtx64_c }, // V_ADST
- { iidtx32_c, ihalfright64_c }, // H_ADST
- { ihalfright32_c, iidtx64_c }, // V_FLIPADST
- { iidtx32_c, ihalfright64_c }, // H_FLIPADST
-#endif
- };
- const int n = 32;
- const int n2 = 64;
-
- int i, j;
- tran_low_t out[64][32], tmp[64][32], outtmp[64];
- tran_low_t *outp = &out[0][0];
- int outstride = n;
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n; ++i) {
- IHT_64x32[tx_type].rows(input, outtmp);
- for (j = 0; j < n2; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
- input += n2;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n2; ++i) IHT_64x32[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
-#if CONFIG_MRC_TX
- assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
-#if CONFIG_DCT_ONLY
- assert(tx_type == DCT_DCT);
-#endif
- static const transform_2d IHT_32x64[] = {
- { idct64_col_c, aom_idct32_c }, // DCT_DCT
- { ihalfright64_c, aom_idct32_c }, // ADST_DCT
- { idct64_col_c, ihalfright32_c }, // DCT_ADST
- { ihalfright64_c, ihalfright32_c }, // ADST_ADST
-#if CONFIG_EXT_TX
- { ihalfright64_c, aom_idct32_c }, // FLIPADST_DCT
- { idct64_col_c, ihalfright32_c }, // DCT_FLIPADST
- { ihalfright64_c, ihalfright32_c }, // FLIPADST_FLIPADST
- { ihalfright64_c, ihalfright32_c }, // ADST_FLIPADST
- { ihalfright64_c, ihalfright32_c }, // FLIPADST_ADST
- { iidtx64_c, iidtx32_c }, // IDTX
- { idct64_col_c, iidtx32_c }, // V_DCT
- { iidtx64_c, aom_idct32_c }, // H_DCT
- { ihalfright64_c, iidtx32_c }, // V_ADST
- { iidtx64_c, ihalfright32_c }, // H_ADST
- { ihalfright64_c, iidtx32_c }, // V_FLIPADST
- { iidtx64_c, ihalfright32_c }, // H_FLIPADST
-#endif
- };
-
- const int n = 32;
- const int n2 = 64;
- int i, j;
- tran_low_t out[32][64], tmp[32][64], outtmp[32];
- tran_low_t *outp = &out[0][0];
- int outstride = n2;
-
- // inverse transform row vectors and transpose
- for (i = 0; i < n2; ++i) {
- IHT_32x64[tx_type].rows(input, outtmp);
- for (j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
- input += n;
- }
-
- // inverse transform column vectors
- for (i = 0; i < n; ++i) IHT_32x64[tx_type].cols(tmp[i], out[i]);
-
-#if CONFIG_EXT_TX
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-#endif
-
- // Sum with the destination
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
-#endif // CONFIG_TX64X64
-
// idct
-void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const int eob = txfm_param->eob;
- if (eob > 1)
- av1_iht4x4_16_add(input, dest, stride, txfm_param);
- else
- aom_idct4x4_1_add(input, dest, stride);
-}
-
-void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const int eob = txfm_param->eob;
- if (eob > 1)
- aom_iwht4x4_16_add(input, dest, stride);
- else
- aom_iwht4x4_1_add(input, dest, stride);
-}
-
-#if !CONFIG_DAALA_DCT8
-static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-// If dc is 1, then input[0] is the reconstructed value, do not need
-// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
-
-// The calculation can be simplified if there are not many non-zero dct
-// coefficients. Use eobs to decide what to do.
-// TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
-// Combine that with code here.
-#if CONFIG_ADAPT_SCAN
- const int16_t half = txfm_param->eob_threshold[0];
-#else
- const int16_t half = 12;
-#endif
-
- const int eob = txfm_param->eob;
- if (eob == 1)
- // DC only DCT coefficient
- aom_idct8x8_1_add(input, dest, stride);
- else if (eob <= half)
- aom_idct8x8_12_add(input, dest, stride);
- else
- aom_idct8x8_64_add(input, dest, stride);
-}
-#endif
-
-#if !CONFIG_DAALA_DCT16
-static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-// The calculation can be simplified if there are not many non-zero dct
-// coefficients. Use eobs to separate different cases.
-#if CONFIG_ADAPT_SCAN
- const int16_t half = txfm_param->eob_threshold[0];
- const int16_t quarter = txfm_param->eob_threshold[1];
-#else
- const int16_t half = 38;
- const int16_t quarter = 10;
-#endif
-
- const int eob = txfm_param->eob;
- if (eob == 1) /* DC only DCT coefficient. */
- aom_idct16x16_1_add(input, dest, stride);
- else if (eob <= quarter)
- aom_idct16x16_10_add(input, dest, stride);
- else if (eob <= half)
- aom_idct16x16_38_add(input, dest, stride);
- else
- aom_idct16x16_256_add(input, dest, stride);
-}
-#endif
-
-#if CONFIG_MRC_TX
-static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-#if CONFIG_ADAPT_SCAN
- const int16_t half = txfm_param->eob_threshold[0];
- const int16_t quarter = txfm_param->eob_threshold[1];
-#else
- const int16_t half = 135;
- const int16_t quarter = 34;
-#endif
-
- const int eob = txfm_param->eob;
- int n_masked_vals = 0;
- uint8_t *mask;
- uint8_t mask_tmp[32 * 32];
- if (eob == 1) {
- aom_idct32x32_1_add_c(input, dest, stride);
- } else {
- if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
- (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
- mask = txfm_param->mask;
- } else {
- n_masked_vals =
- get_mrc_pred_mask(txfm_param->dst, txfm_param->stride, mask_tmp, 32,
- 32, 32, txfm_param->is_inter);
- if (!is_valid_mrc_mask(n_masked_vals, 32, 32))
- assert(0 && "Invalid MRC mask");
- mask = mask_tmp;
- }
- if (eob <= quarter)
- // non-zero coeff only in upper-left 8x8
- aom_imrc32x32_34_add_c(input, dest, stride, mask);
- else if (eob <= half)
- // non-zero coeff only in upper-left 16x16
- aom_imrc32x32_135_add_c(input, dest, stride, mask);
- else
- aom_imrc32x32_1024_add_c(input, dest, stride, mask);
- }
-}
-#endif // CONFIG_MRC_TX
-
-#if !CONFIG_DAALA_DCT32
-static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-#if CONFIG_ADAPT_SCAN
- const int16_t half = txfm_param->eob_threshold[0];
- const int16_t quarter = txfm_param->eob_threshold[1];
-#else
- const int16_t half = 135;
- const int16_t quarter = 34;
-#endif
-
- const int eob = txfm_param->eob;
- if (eob == 1)
- aom_idct32x32_1_add(input, dest, stride);
- else if (eob <= quarter)
- // non-zero coeff only in upper-left 8x8
- aom_idct32x32_34_add(input, dest, stride);
- else if (eob <= half)
- // non-zero coeff only in upper-left 16x16
- aom_idct32x32_135_add(input, dest, stride);
- else
- aom_idct32x32_1024_add(input, dest, stride);
-}
-#endif
-
-#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
-static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- (void)txfm_param;
- av1_iht64x64_4096_add(input, dest, stride, txfm_param);
-}
-#endif // CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
-
-#if CONFIG_CHROMA_2X2
-static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
- tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
- tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
- tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
-
- tran_high_t a2 = a1 + c1;
- tran_high_t b2 = b1 + d1;
- tran_high_t c2 = a1 - c1;
- tran_high_t d2 = b1 - d1;
-
- (void)txfm_param;
-
- a1 = (a2 + b2) >> 2;
- b1 = (a2 - b2) >> 2;
- c1 = (c2 + d2) >> 2;
- d1 = (c2 - d2) >> 2;
-
- dest[0] = clip_pixel_add(dest[0], WRAPLOW(a1));
- dest[1] = clip_pixel_add(dest[1], WRAPLOW(b1));
- dest[stride] = clip_pixel_add(dest[stride], WRAPLOW(c1));
- dest[stride + 1] = clip_pixel_add(dest[stride + 1], WRAPLOW(d1));
-}
-#endif
-
-static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (txfm_param->lossless) {
- assert(tx_type == DCT_DCT);
- av1_iwht4x4_add(input, dest, stride, txfm_param);
- return;
- }
-
- switch (tx_type) {
-#if !CONFIG_DAALA_DCT4
- case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break;
-#else
- case DCT_DCT:
-#endif
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_LGT || CONFIG_DAALA_DCT4
- // LGT only exists in C verson
- av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
- break;
-#else
- av1_iht4x4_16_add(input, dest, stride, txfm_param);
- break;
-#endif
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
-#if CONFIG_LGT || CONFIG_DAALA_DCT4
- av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
- break;
-#else
- av1_iht4x4_16_add(input, dest, stride, txfm_param);
- break;
-#endif
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- // Use C version since DST only exists in C code
- av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
- break;
- case IDTX: inv_idtx_add_c(input, dest, stride, 4, 4, tx_type); break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-}
-
-static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht4x8_32_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht4x8_32_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht8x4_32_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht8x4_32_add(input, dest, stride, txfm_param);
-#endif
-}
-
-// These will be used by the masked-tx experiment in the future.
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht4x16_64_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht4x16_64_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht16x4_64_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht16x4_64_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht8x32_256_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht8x32_256_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht32x8_256_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht32x8_256_add(input, dest, stride, txfm_param);
-#endif
-}
-#endif
-
-static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht8x16_128_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht8x16_128_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
-#if CONFIG_LGT
- av1_iht16x8_128_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht16x8_128_add(input, dest, stride, txfm_param);
-#endif
-}
-
-static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- av1_iht16x32_512_add(input, dest, stride, txfm_param);
-}
-
-static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- av1_iht32x16_512_add(input, dest, stride, txfm_param);
-}
-
-#if CONFIG_TX64X64
-static void inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- av1_iht32x64_2048_add(input, dest, stride, txfm_param);
-}
-
-static void inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- av1_iht64x32_2048_add(input, dest, stride, txfm_param);
-}
-#endif // CONFIG_TX64X64
-
-static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
-#if !CONFIG_DAALA_DCT8
- case DCT_DCT: idct8x8_add(input, dest, stride, txfm_param); break;
-#else
- case DCT_DCT:
-#endif
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_LGT || CONFIG_DAALA_DCT8
- av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
- break;
-#else
- av1_iht8x8_64_add(input, dest, stride, txfm_param);
- break;
-#endif
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
-#if CONFIG_LGT || CONFIG_DAALA_DCT8
- av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
- break;
-#else
- av1_iht8x8_64_add(input, dest, stride, txfm_param);
- break;
-#endif
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- // Use C version since DST only exists in C code
- av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
- break;
- case IDTX: inv_idtx_add_c(input, dest, stride, 8, 8, tx_type); break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-}
-
-static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
-#if !CONFIG_DAALA_DCT16
- case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
-#else
- case DCT_DCT:
-#endif
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_DAALA_DCT16
- av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht16x16_256_add(input, dest, stride, txfm_param);
-#endif // CONFIG_DAALA_DCT16
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
-#if CONFIG_DAALA_DCT16
- av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
-#else
- av1_iht16x16_256_add(input, dest, stride, txfm_param);
-#endif // CONFIG_DAALA_DCT16
- break;
- case IDTX: inv_idtx_add_c(input, dest, stride, 16, 16, tx_type); break;
-#endif // CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- case MRC_DCT: assert(0 && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
- default: assert(0); break;
- }
-}
-
-static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
-#if !CONFIG_DAALA_DCT32
- case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
-#else
- case DCT_DCT:
- av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
- break;
-#endif
-#if CONFIG_EXT_TX
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
- break;
- case IDTX: inv_idtx_add_c(input, dest, stride, 32, 32, tx_type); break;
-#endif // CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- case MRC_DCT: imrc32x32_add_c(input, dest, stride, txfm_param); break;
-#endif // CONFIG_MRC_TX
- default: assert(0); break;
- }
-}
-
-#if CONFIG_TX64X64
-static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- assert(tx_type == DCT_DCT);
- switch (tx_type) {
-#if !CONFIG_DAALA_DCT64
- case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
-#else
- case DCT_DCT:
-#endif
-#if CONFIG_EXT_TX
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- av1_iht64x64_4096_add_c(input, dest, stride, txfm_param);
- break;
- case IDTX: inv_idtx_add_c(input, dest, stride, 64, 64, tx_type); break;
-#endif // CONFIG_EXT_TX
-#if CONFIG_MRC_TX
- case MRC_DCT: assert(0 && "Invalid tx type for tx size");
-#endif // CONFIG_MRC_TX
- default: assert(0); break;
- }
-}
-#endif // CONFIG_TX64X64
-
-void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- int eob, int bd) {
+static void highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd) {
if (eob > 1)
- aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
+ av1_highbd_iwht4x4_16_add(input, dest, stride, bd);
else
- aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
+ av1_highbd_iwht4x4_1_add(input, dest, stride, bd);
}
-#if CONFIG_CHROMA_2X2
-static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- int eob = txfm_param->eob;
- int bd = txfm_param->bd;
- int lossless = txfm_param->lossless;
- const TX_TYPE tx_type = txfm_param->tx_type;
- tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
- tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
- tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
- tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
-
- tran_high_t a2 = a1 + c1;
- tran_high_t b2 = b1 + d1;
- tran_high_t c2 = a1 - c1;
- tran_high_t d2 = b1 - d1;
-
- uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
-
- (void)tx_type;
- (void)lossless;
- (void)eob;
-
- a1 = (a2 + b2) >> 2;
- b1 = (a2 - b2) >> 2;
- c1 = (c2 + d2) >> 2;
- d1 = (c2 - d2) >> 2;
-
- dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
- dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
- dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
- dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
-}
-#endif
-
static const int32_t *cast_to_int32(const tran_low_t *input) {
assert(sizeof(int32_t) == sizeof(tran_low_t));
return (const int32_t *)input;
@@ -2636,6 +46,7 @@ static const int32_t *cast_to_int32(const tran_low_t *input) {
void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
+ assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
int eob = txfm_param->eob;
int bd = txfm_param->bd;
int lossless = txfm_param->lossless;
@@ -2643,27 +54,12 @@ void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
const TX_TYPE tx_type = txfm_param->tx_type;
if (lossless) {
assert(tx_type == DCT_DCT);
- av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
+ highbd_iwht4x4_add(input, dest, stride, eob, bd);
return;
}
switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
- // use the c version for anything including identity for now
+ // Assembly version doesn't support some transform types, so use C version
+ // for those.
case V_DCT:
case H_DCT:
case V_ADST:
@@ -2674,68 +70,112 @@ void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
bd);
break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
+ default:
+ av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
}
}
-void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
+static void highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_4x8(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
-void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
+static void highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_8x4(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_8x16(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_16x8(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_16x32(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_32x16(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_16x4(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_4x16(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_32x8(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_8x32(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
-#if CONFIG_TX64X64
static void highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_32x64(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
static void highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
+ av1_inv_txfm2d_add_64x32(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_16x64(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_16x64(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_64x16(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ const int32_t *src = cast_to_int32(input);
+ av1_inv_txfm2d_add_64x16(src, CONVERT_TO_SHORTPTR(dest), stride,
+ txfm_param->tx_type, txfm_param->bd);
}
-#endif // CONFIG_TX64X64
static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
@@ -2743,23 +183,8 @@ static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
const TX_TYPE tx_type = txfm_param->tx_type;
const int32_t *src = cast_to_int32(input);
switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
- // use the c version for anything including identity for now
+ // Assembly version doesn't support some transform types, so use C version
+ // for those.
case V_DCT:
case H_DCT:
case V_ADST:
@@ -2770,8 +195,10 @@ static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
bd);
break;
-#endif // CONFIG_EXT_TX
- default: assert(0);
+ default:
+ av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
}
}
@@ -2781,23 +208,8 @@ static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
const TX_TYPE tx_type = txfm_param->tx_type;
const int32_t *src = cast_to_int32(input);
switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
- // use the c version for anything including identity for now
+ // Assembly version doesn't support some transform types, so use C version
+ // for those.
case V_DCT:
case H_DCT:
case V_ADST:
@@ -2808,14 +220,16 @@ static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
tx_type, bd);
break;
-#endif // CONFIG_EXT_TX
- default: assert(0);
+ default:
+ av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
}
}
static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
+ const int bd = txfm_param->bd;
const TX_TYPE tx_type = txfm_param->tx_type;
const int32_t *src = cast_to_int32(input);
switch (tx_type) {
@@ -2823,26 +237,8 @@ static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
av1_inv_txfm2d_add_32x32(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
bd);
break;
-
- // The optimised version only supports DCT_DCT, so force use of
- // the C version for all other transform types.
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
+ // Assembly version doesn't support IDTX, so use C version for it.
case IDTX:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
-#endif // CONFIG_EXT_TX
av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
tx_type, bd);
break;
@@ -2851,225 +247,34 @@ static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
}
}
-#if CONFIG_TX64X64
static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
+ const int bd = txfm_param->bd;
const TX_TYPE tx_type = txfm_param->tx_type;
const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- case DCT_DCT:
- av1_inv_txfm2d_add_64x64(src, CONVERT_TO_SHORTPTR(dest), stride, DCT_DCT,
- bd);
- break;
-#if CONFIG_EXT_TX
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- // TODO(sarahparker)
- // I've deleted the 64x64 implementations that existed in lieu
- // of adst, flipadst and identity for simplicity but will bring back
- // in a later change. This shouldn't impact performance since
- // DCT_DCT is the only extended type currently allowed for 64x64,
- // as dictated by get_ext_tx_set_type in blockd.h.
- av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- DCT_DCT, bd);
- break;
- case IDTX:
- highbd_inv_idtx_add_c(input, dest, stride, 64, 64, tx_type, bd);
- break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-}
-#endif // CONFIG_TX64X64
-
-void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
- TxfmParam *txfm_param) {
- const TX_SIZE tx_size = txfm_param->tx_size;
-#if CONFIG_LGT_FROM_PRED
- if (txfm_param->use_lgt) {
- assert(is_lgt_allowed(txfm_param->mode, tx_size));
- ilgt2d_from_pred_add(input, dest, stride, txfm_param);
- return;
- }
-#endif // CONFIG_LGT_FROM_PRED
- switch (tx_size) {
-#if CONFIG_TX64X64
- case TX_64X64: inv_txfm_add_64x64(input, dest, stride, txfm_param); break;
-#endif // CONFIG_TX64X64
- case TX_32X32: inv_txfm_add_32x32(input, dest, stride, txfm_param); break;
- case TX_16X16: inv_txfm_add_16x16(input, dest, stride, txfm_param); break;
- case TX_8X8: inv_txfm_add_8x8(input, dest, stride, txfm_param); break;
- case TX_4X8: inv_txfm_add_4x8(input, dest, stride, txfm_param); break;
- case TX_8X4: inv_txfm_add_8x4(input, dest, stride, txfm_param); break;
- case TX_8X16: inv_txfm_add_8x16(input, dest, stride, txfm_param); break;
- case TX_16X8: inv_txfm_add_16x8(input, dest, stride, txfm_param); break;
- case TX_16X32: inv_txfm_add_16x32(input, dest, stride, txfm_param); break;
- case TX_32X16: inv_txfm_add_32x16(input, dest, stride, txfm_param); break;
-#if CONFIG_TX64X64
- case TX_64X32: inv_txfm_add_64x32(input, dest, stride, txfm_param); break;
- case TX_32X64: inv_txfm_add_32x64(input, dest, stride, txfm_param); break;
-#endif // CONFIG_TX64X64
- case TX_4X4:
- // this is like av1_short_idct4x4 but has a special case around eob<=1
- // which is significant (not just an optimization) for the lossless
- // case.
- inv_txfm_add_4x4(input, dest, stride, txfm_param);
- break;
-#if CONFIG_CHROMA_2X2
- case TX_2X2: inv_txfm_add_2x2(input, dest, stride, txfm_param); break;
-#endif
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_32X8: inv_txfm_add_32x8(input, dest, stride, txfm_param); break;
- case TX_8X32: inv_txfm_add_8x32(input, dest, stride, txfm_param); break;
- case TX_16X4: inv_txfm_add_16x4(input, dest, stride, txfm_param); break;
- case TX_4X16: inv_txfm_add_4x16(input, dest, stride, txfm_param); break;
-#endif
- default: assert(0 && "Invalid transform size"); break;
- }
+ assert(tx_type == DCT_DCT);
+ av1_inv_txfm2d_add_64x64(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd);
}
-static void init_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
- TX_TYPE tx_type, int eob, TxfmParam *txfm_param) {
+static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
+ TX_TYPE tx_type, int eob, int reduced_tx_set,
+ TxfmParam *txfm_param) {
+ (void)plane;
txfm_param->tx_type = tx_type;
txfm_param->tx_size = tx_size;
txfm_param->eob = eob;
- txfm_param->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+ txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id];
txfm_param->bd = xd->bd;
-#if CONFIG_LGT
- txfm_param->is_inter = is_inter_block(&xd->mi[0]->mbmi);
-#endif
-#if CONFIG_LGT_FROM_PRED
- txfm_param->use_lgt = xd->mi[0]->mbmi.use_lgt;
-#endif
-#if CONFIG_ADAPT_SCAN
- txfm_param->eob_threshold =
- (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
-#endif
-}
-
-#if !CONFIG_TXMG
-typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- TxfmParam *txfm_param);
-
-static InvTxfmFunc inv_txfm_func[2] = { av1_inv_txfm_add,
- av1_highbd_inv_txfm_add };
-#endif
-
-void av1_inverse_transform_block(const MACROBLOCKD *xd,
- const tran_low_t *dqcoeff,
-#if CONFIG_LGT_FROM_PRED
- PREDICTION_MODE mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- uint8_t *mrc_mask,
-#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
- int stride, int eob) {
- if (!eob) return;
-#if CONFIG_PVQ
- const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
- const int txb_width = block_size_wide[tx_bsize];
- const int txb_height = block_size_high[tx_bsize];
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (int r = 0; r < txb_height; r++)
- for (int c = 0; c < txb_width; c++)
- CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
- } else {
- for (int r = 0; r < txb_height; r++)
- for (int c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
- }
-#endif // CONFIG_PVQ
- TxfmParam txfm_param;
- init_txfm_param(xd, tx_size, tx_type, eob, &txfm_param);
-#if CONFIG_LGT || CONFIG_MRC_TX
- txfm_param.is_inter = is_inter_block(&xd->mi[0]->mbmi);
-#endif // CONFIG_LGT || CONFIG_MRC_TX
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- txfm_param.mask = mrc_mask;
-#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
-#if CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
- txfm_param.dst = dst;
- txfm_param.stride = stride;
-#if CONFIG_LGT_FROM_PRED
- txfm_param.mode = mode;
-#endif // CONFIG_LGT_FROM_PRED
-#endif // CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
-
- const int is_hbd = get_bitdepth_data_path_index(xd);
-#if CONFIG_TXMG
- if (is_hbd) {
- av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
- } else {
- DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
- int tmp_stride = MAX_TX_SIZE;
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- tmp[r * tmp_stride + c] = dst[r * stride + c];
- }
- }
-
- av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
- &txfm_param);
-
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
- }
- }
- }
-#else // CONFIG_TXMG
- inv_txfm_func[is_hbd](dqcoeff, dst, stride, &txfm_param);
-#endif // CONFIG_TXMG
-}
-
-void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
- int blk_row, int blk_col, int eob) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
-#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
- const TX_TYPE tx_type =
- av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- av1_inverse_transform_block(xd, dqcoeff,
-#if CONFIG_LGT_FROM_PRED
- xd->mi[0]->mbmi.mode,
-#endif // CONFIG_LGT_FROM_PRED
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- mrc_mask,
-#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- tx_type, tx_size, dst, dst_stride, eob);
+ txfm_param->is_hbd = get_bitdepth_data_path_index(xd);
+ txfm_param->tx_set_type = av1_get_ext_tx_set_type(
+ txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
}
-void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
- TxfmParam *txfm_param) {
+static void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest,
+ int stride, const TxfmParam *txfm_param) {
+ assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
-#if CONFIG_TX64X64
- case TX_64X64:
- highbd_inv_txfm_add_64x64(input, dest, stride, txfm_param);
- break;
-#endif // CONFIG_TX64X64
case TX_32X32:
highbd_inv_txfm_add_32x32(input, dest, stride, txfm_param);
break;
@@ -3080,10 +285,10 @@ void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
highbd_inv_txfm_add_8x8(input, dest, stride, txfm_param);
break;
case TX_4X8:
- av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
+ highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
break;
case TX_8X4:
- av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
+ highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
break;
case TX_8X16:
highbd_inv_txfm_add_8x16(input, dest, stride, txfm_param);
@@ -3097,25 +302,81 @@ void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
case TX_32X16:
highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
break;
-#if CONFIG_TX64X64
- case TX_64X32:
- highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
+ case TX_64X64:
+ highbd_inv_txfm_add_64x64(input, dest, stride, txfm_param);
break;
case TX_32X64:
highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
break;
-#endif // CONFIG_TX64X64
+ case TX_64X32:
+ highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
+ break;
+ case TX_16X64:
+ highbd_inv_txfm_add_16x64(input, dest, stride, txfm_param);
+ break;
+ case TX_64X16:
+ highbd_inv_txfm_add_64x16(input, dest, stride, txfm_param);
+ break;
case TX_4X4:
// this is like av1_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
av1_highbd_inv_txfm_add_4x4(input, dest, stride, txfm_param);
break;
-#if CONFIG_CHROMA_2X2
- case TX_2X2:
- highbd_inv_txfm_add_2x2(input, dest, stride, txfm_param);
+ case TX_16X4:
+ highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param);
+ break;
+ case TX_4X16:
+ highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param);
+ break;
+ case TX_8X32:
+ highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param);
+ break;
+ case TX_32X8:
+ highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param);
break;
-#endif
default: assert(0 && "Invalid transform size"); break;
}
}
+
+void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
+ const TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
+ DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]);
+ int tmp_stride = MAX_TX_SIZE;
+ int w = tx_size_wide[tx_size];
+ int h = tx_size_high[tx_size];
+ for (int r = 0; r < h; ++r) {
+ for (int c = 0; c < w; ++c) {
+ tmp[r * tmp_stride + c] = dst[r * stride + c];
+ }
+ }
+
+ highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, txfm_param);
+
+ for (int r = 0; r < h; ++r) {
+ for (int c = 0; c < w; ++c) {
+ dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
+ }
+ }
+}
+
+void av1_inverse_transform_block(const MACROBLOCKD *xd,
+ const tran_low_t *dqcoeff, int plane,
+ TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
+ int stride, int eob, int reduced_tx_set) {
+ if (!eob) return;
+
+ assert(eob <= av1_get_max_eob(tx_size));
+
+ TxfmParam txfm_param;
+ init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set,
+ &txfm_param);
+ assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]);
+
+ if (txfm_param.is_hbd) {
+ highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+ } else {
+ av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+ }
+}
diff --git a/third_party/aom/av1/common/idct.h b/third_party/aom/av1/common/idct.h
index e4e4ad671..50032a167 100644
--- a/third_party/aom/av1/common/idct.h
+++ b/third_party/aom/av1/common/idct.h
@@ -12,15 +12,12 @@
#ifndef AV1_COMMON_IDCT_H_
#define AV1_COMMON_IDCT_H_
-#include <assert.h>
+#include "config/aom_config.h"
-#include "./aom_config.h"
#include "av1/common/blockd.h"
#include "av1/common/common.h"
#include "av1/common/enums.h"
-#include "aom_dsp/inv_txfm.h"
#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
#ifdef __cplusplus
extern "C" {
@@ -32,64 +29,16 @@ typedef struct {
transform_1d cols, rows; // vertical and horizontal
} transform_2d;
-#if CONFIG_LGT
-int get_lgt4(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx);
-int get_lgt8(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx);
-#endif // CONFIG_LGT
-
-#if CONFIG_LGT_FROM_PRED
-void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx);
-void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx);
-void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
- const tran_high_t **lgtmtx, int ntx);
-#endif // CONFIG_LGT_FROM_PRED
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
-
-typedef struct {
- highbd_transform_1d cols, rows; // vertical and horizontal
-} highbd_transform_2d;
-#endif // CONFIG_HIGHBITDEPTH
-
#define MAX_TX_SCALE 1
int av1_get_tx_scale(const TX_SIZE tx_size);
-void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param);
-void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param);
-
-void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
- TxfmParam *txfm_param);
void av1_inverse_transform_block(const MACROBLOCKD *xd,
- const tran_low_t *dqcoeff,
-#if CONFIG_LGT_FROM_PRED
- PREDICTION_MODE mode,
-#endif
-#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
- uint8_t *mrc_mask,
-#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+ const tran_low_t *dqcoeff, int plane,
TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
- int stride, int eob);
-void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
- int blk_row, int blk_col, int eob);
+ int stride, int eob, int reduced_tx_set);
-void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- int eob, int bd);
void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *param);
-void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *param);
-void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *param);
-void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
- TxfmParam *txfm_param);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/laplace_tables.c b/third_party/aom/av1/common/laplace_tables.c
deleted file mode 100644
index ab8784895..000000000
--- a/third_party/aom/av1/common/laplace_tables.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/* This file is auto-generated using "gen_laplace_tables 128 7" */
-
-/* clang-format off */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "aom_dsp/prob.h"
-#include "pvq.h"
-
-const uint16_t EXP_CDF_TABLE[128][16] = {
- {AOM_ICDF(32753), AOM_ICDF(32754), AOM_ICDF(32755), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(32499), AOM_ICDF(32753), AOM_ICDF(32755), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(32243), AOM_ICDF(32747), AOM_ICDF(32755), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(31987), AOM_ICDF(32737), AOM_ICDF(32755), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(31732), AOM_ICDF(32724), AOM_ICDF(32755), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(31476), AOM_ICDF(32706), AOM_ICDF(32754), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(31220), AOM_ICDF(32684), AOM_ICDF(32753), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(30964), AOM_ICDF(32658), AOM_ICDF(32751), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(30708), AOM_ICDF(32628), AOM_ICDF(32748), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(30452), AOM_ICDF(32594), AOM_ICDF(32745), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(30198), AOM_ICDF(32558), AOM_ICDF(32742), AOM_ICDF(32756),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(29941), AOM_ICDF(32515), AOM_ICDF(32736), AOM_ICDF(32755),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(29686), AOM_ICDF(32470), AOM_ICDF(32731), AOM_ICDF(32755),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(29429), AOM_ICDF(32419), AOM_ICDF(32723), AOM_ICDF(32754),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(29174), AOM_ICDF(32366), AOM_ICDF(32715), AOM_ICDF(32753),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(28918), AOM_ICDF(32308), AOM_ICDF(32705), AOM_ICDF(32752),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(28662), AOM_ICDF(32246), AOM_ICDF(32694), AOM_ICDF(32750),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(28406), AOM_ICDF(32180), AOM_ICDF(32681), AOM_ICDF(32748),
- AOM_ICDF(32757), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(28150), AOM_ICDF(32110), AOM_ICDF(32667), AOM_ICDF(32745),
- AOM_ICDF(32756), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(27894), AOM_ICDF(32036), AOM_ICDF(32651), AOM_ICDF(32742),
- AOM_ICDF(32756), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(27639), AOM_ICDF(31959), AOM_ICDF(32634), AOM_ICDF(32739),
- AOM_ICDF(32755), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(27383), AOM_ICDF(31877), AOM_ICDF(32614), AOM_ICDF(32735),
- AOM_ICDF(32755), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(27126), AOM_ICDF(31790), AOM_ICDF(32592), AOM_ICDF(32730),
- AOM_ICDF(32754), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(26871), AOM_ICDF(31701), AOM_ICDF(32569), AOM_ICDF(32725),
- AOM_ICDF(32753), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(26615), AOM_ICDF(31607), AOM_ICDF(32543), AOM_ICDF(32719),
- AOM_ICDF(32752), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(26361), AOM_ICDF(31511), AOM_ICDF(32517), AOM_ICDF(32713),
- AOM_ICDF(32751), AOM_ICDF(32758), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(26104), AOM_ICDF(31408), AOM_ICDF(32485), AOM_ICDF(32704),
- AOM_ICDF(32748), AOM_ICDF(32757), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(25848), AOM_ICDF(31302), AOM_ICDF(32452), AOM_ICDF(32695),
- AOM_ICDF(32746), AOM_ICDF(32757), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(25591), AOM_ICDF(31191), AOM_ICDF(32416), AOM_ICDF(32684),
- AOM_ICDF(32743), AOM_ICDF(32756), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(25336), AOM_ICDF(31078), AOM_ICDF(32379), AOM_ICDF(32674),
- AOM_ICDF(32741), AOM_ICDF(32756), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(25080), AOM_ICDF(30960), AOM_ICDF(32338), AOM_ICDF(32661),
- AOM_ICDF(32737), AOM_ICDF(32755), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(24824), AOM_ICDF(30838), AOM_ICDF(32295), AOM_ICDF(32648),
- AOM_ICDF(32733), AOM_ICDF(32754), AOM_ICDF(32759), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(24568), AOM_ICDF(30712), AOM_ICDF(32248), AOM_ICDF(32632),
- AOM_ICDF(32728), AOM_ICDF(32752), AOM_ICDF(32758), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(24313), AOM_ICDF(30583), AOM_ICDF(32199), AOM_ICDF(32616),
- AOM_ICDF(32723), AOM_ICDF(32751), AOM_ICDF(32758), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(24057), AOM_ICDF(30449), AOM_ICDF(32147), AOM_ICDF(32598),
- AOM_ICDF(32718), AOM_ICDF(32750), AOM_ICDF(32758), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(23801), AOM_ICDF(30311), AOM_ICDF(32091), AOM_ICDF(32578),
- AOM_ICDF(32711), AOM_ICDF(32747), AOM_ICDF(32757), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(23546), AOM_ICDF(30170), AOM_ICDF(32033), AOM_ICDF(32557),
- AOM_ICDF(32704), AOM_ICDF(32745), AOM_ICDF(32757), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(23288), AOM_ICDF(30022), AOM_ICDF(31969), AOM_ICDF(32532),
- AOM_ICDF(32695), AOM_ICDF(32742), AOM_ICDF(32756), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(23033), AOM_ICDF(29873), AOM_ICDF(31904), AOM_ICDF(32507),
- AOM_ICDF(32686), AOM_ICDF(32739), AOM_ICDF(32755), AOM_ICDF(32760),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(22778), AOM_ICDF(29720), AOM_ICDF(31835), AOM_ICDF(32479),
- AOM_ICDF(32675), AOM_ICDF(32735), AOM_ICDF(32753), AOM_ICDF(32759),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(22521), AOM_ICDF(29561), AOM_ICDF(31761), AOM_ICDF(32449),
- AOM_ICDF(32664), AOM_ICDF(32731), AOM_ICDF(32752), AOM_ICDF(32759),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(22267), AOM_ICDF(29401), AOM_ICDF(31686), AOM_ICDF(32418),
- AOM_ICDF(32652), AOM_ICDF(32727), AOM_ICDF(32751), AOM_ICDF(32759),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(22011), AOM_ICDF(29235), AOM_ICDF(31605), AOM_ICDF(32383),
- AOM_ICDF(32638), AOM_ICDF(32722), AOM_ICDF(32749), AOM_ICDF(32758),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(21754), AOM_ICDF(29064), AOM_ICDF(31520), AOM_ICDF(32345),
- AOM_ICDF(32622), AOM_ICDF(32715), AOM_ICDF(32746), AOM_ICDF(32757),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(21501), AOM_ICDF(28893), AOM_ICDF(31434), AOM_ICDF(32307),
- AOM_ICDF(32607), AOM_ICDF(32710), AOM_ICDF(32745), AOM_ICDF(32757),
- AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(21243), AOM_ICDF(28713), AOM_ICDF(31339), AOM_ICDF(32262),
- AOM_ICDF(32587), AOM_ICDF(32701), AOM_ICDF(32741), AOM_ICDF(32755),
- AOM_ICDF(32760), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(20988), AOM_ICDF(28532), AOM_ICDF(31243), AOM_ICDF(32217),
- AOM_ICDF(32567), AOM_ICDF(32693), AOM_ICDF(32738), AOM_ICDF(32754),
- AOM_ICDF(32760), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(20730), AOM_ICDF(28344), AOM_ICDF(31140), AOM_ICDF(32167),
- AOM_ICDF(32544), AOM_ICDF(32682), AOM_ICDF(32733), AOM_ICDF(32752),
- AOM_ICDF(32759), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(20476), AOM_ICDF(28156), AOM_ICDF(31036), AOM_ICDF(32116),
- AOM_ICDF(32521), AOM_ICDF(32673), AOM_ICDF(32730), AOM_ICDF(32751),
- AOM_ICDF(32759), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(20220), AOM_ICDF(27962), AOM_ICDF(30926), AOM_ICDF(32061),
- AOM_ICDF(32495), AOM_ICDF(32661), AOM_ICDF(32725), AOM_ICDF(32749),
- AOM_ICDF(32758), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(19963), AOM_ICDF(27763), AOM_ICDF(30810), AOM_ICDF(32000),
- AOM_ICDF(32465), AOM_ICDF(32647), AOM_ICDF(32718), AOM_ICDF(32746),
- AOM_ICDF(32757), AOM_ICDF(32761), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(19708), AOM_ICDF(27562), AOM_ICDF(30691), AOM_ICDF(31938),
- AOM_ICDF(32435), AOM_ICDF(32633), AOM_ICDF(32712), AOM_ICDF(32743),
- AOM_ICDF(32756), AOM_ICDF(32761), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(19454), AOM_ICDF(27358), AOM_ICDF(30569), AOM_ICDF(31873),
- AOM_ICDF(32403), AOM_ICDF(32618), AOM_ICDF(32705), AOM_ICDF(32741),
- AOM_ICDF(32755), AOM_ICDF(32761), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(19196), AOM_ICDF(27146), AOM_ICDF(30438), AOM_ICDF(31801),
- AOM_ICDF(32365), AOM_ICDF(32599), AOM_ICDF(32696), AOM_ICDF(32736),
- AOM_ICDF(32753), AOM_ICDF(32760), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(18942), AOM_ICDF(26934), AOM_ICDF(30306), AOM_ICDF(31728),
- AOM_ICDF(32328), AOM_ICDF(32581), AOM_ICDF(32688), AOM_ICDF(32733),
- AOM_ICDF(32752), AOM_ICDF(32760), AOM_ICDF(32763), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(18684), AOM_ICDF(26714), AOM_ICDF(30164), AOM_ICDF(31647),
- AOM_ICDF(32284), AOM_ICDF(32558), AOM_ICDF(32676), AOM_ICDF(32727),
- AOM_ICDF(32749), AOM_ICDF(32758), AOM_ICDF(32762), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(18429), AOM_ICDF(26493), AOM_ICDF(30021), AOM_ICDF(31565),
- AOM_ICDF(32240), AOM_ICDF(32535), AOM_ICDF(32664), AOM_ICDF(32721),
- AOM_ICDF(32746), AOM_ICDF(32757), AOM_ICDF(32762), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(18174), AOM_ICDF(26268), AOM_ICDF(29872), AOM_ICDF(31477),
- AOM_ICDF(32192), AOM_ICDF(32510), AOM_ICDF(32652), AOM_ICDF(32715),
- AOM_ICDF(32743), AOM_ICDF(32756), AOM_ICDF(32762), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(17920), AOM_ICDF(26040), AOM_ICDF(29719), AOM_ICDF(31386),
- AOM_ICDF(32141), AOM_ICDF(32483), AOM_ICDF(32638), AOM_ICDF(32708),
- AOM_ICDF(32740), AOM_ICDF(32754), AOM_ICDF(32761), AOM_ICDF(32764),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(17661), AOM_ICDF(25803), AOM_ICDF(29556), AOM_ICDF(31286),
- AOM_ICDF(32083), AOM_ICDF(32451), AOM_ICDF(32620), AOM_ICDF(32698),
- AOM_ICDF(32734), AOM_ICDF(32751), AOM_ICDF(32759), AOM_ICDF(32763),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(17406), AOM_ICDF(25566), AOM_ICDF(29391), AOM_ICDF(31184),
- AOM_ICDF(32024), AOM_ICDF(32418), AOM_ICDF(32603), AOM_ICDF(32690),
- AOM_ICDF(32731), AOM_ICDF(32750), AOM_ICDF(32759), AOM_ICDF(32763),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(17151), AOM_ICDF(25325), AOM_ICDF(29220), AOM_ICDF(31076),
- AOM_ICDF(31961), AOM_ICDF(32383), AOM_ICDF(32584), AOM_ICDF(32680),
- AOM_ICDF(32726), AOM_ICDF(32748), AOM_ICDF(32758), AOM_ICDF(32763),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(16896), AOM_ICDF(25080), AOM_ICDF(29044), AOM_ICDF(30964),
- AOM_ICDF(31894), AOM_ICDF(32344), AOM_ICDF(32562), AOM_ICDF(32668),
- AOM_ICDF(32719), AOM_ICDF(32744), AOM_ICDF(32756), AOM_ICDF(32762),
- AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(16639), AOM_ICDF(24829), AOM_ICDF(28860), AOM_ICDF(30844),
- AOM_ICDF(31821), AOM_ICDF(32302), AOM_ICDF(32539), AOM_ICDF(32655),
- AOM_ICDF(32712), AOM_ICDF(32740), AOM_ICDF(32754), AOM_ICDF(32761),
- AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(30720),
- AOM_ICDF(31744), AOM_ICDF(32256), AOM_ICDF(32512), AOM_ICDF(32640),
- AOM_ICDF(32704), AOM_ICDF(32736), AOM_ICDF(32752), AOM_ICDF(32760),
- AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(16130), AOM_ICDF(24320), AOM_ICDF(28479), AOM_ICDF(30591),
- AOM_ICDF(31663), AOM_ICDF(32208), AOM_ICDF(32485), AOM_ICDF(32625),
- AOM_ICDF(32696), AOM_ICDF(32732), AOM_ICDF(32750), AOM_ICDF(32759),
- AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768)},
- {AOM_ICDF(15872), AOM_ICDF(24056), AOM_ICDF(28276), AOM_ICDF(30452),
- AOM_ICDF(31574), AOM_ICDF(32152), AOM_ICDF(32450), AOM_ICDF(32604),
- AOM_ICDF(32683), AOM_ICDF(32724), AOM_ICDF(32745), AOM_ICDF(32756),
- AOM_ICDF(32762), AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32768)},
- {AOM_ICDF(15615), AOM_ICDF(23789), AOM_ICDF(28068), AOM_ICDF(30308),
- AOM_ICDF(31480), AOM_ICDF(32094), AOM_ICDF(32415), AOM_ICDF(32583),
- AOM_ICDF(32671), AOM_ICDF(32717), AOM_ICDF(32741), AOM_ICDF(32754),
- AOM_ICDF(32761), AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32768)},
- {AOM_ICDF(15361), AOM_ICDF(23521), AOM_ICDF(27856), AOM_ICDF(30159),
- AOM_ICDF(31382), AOM_ICDF(32032), AOM_ICDF(32377), AOM_ICDF(32560),
- AOM_ICDF(32657), AOM_ICDF(32709), AOM_ICDF(32737), AOM_ICDF(32752),
- AOM_ICDF(32760), AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32768)},
- {AOM_ICDF(15103), AOM_ICDF(23245), AOM_ICDF(27634), AOM_ICDF(30000),
- AOM_ICDF(31275), AOM_ICDF(31963), AOM_ICDF(32334), AOM_ICDF(32534),
- AOM_ICDF(32642), AOM_ICDF(32700), AOM_ICDF(32731), AOM_ICDF(32748),
- AOM_ICDF(32757), AOM_ICDF(32762), AOM_ICDF(32765), AOM_ICDF(32768)},
- {AOM_ICDF(14848), AOM_ICDF(22968), AOM_ICDF(27409), AOM_ICDF(29837),
- AOM_ICDF(31165), AOM_ICDF(31891), AOM_ICDF(32288), AOM_ICDF(32505),
- AOM_ICDF(32624), AOM_ICDF(32689), AOM_ICDF(32725), AOM_ICDF(32744),
- AOM_ICDF(32755), AOM_ICDF(32761), AOM_ICDF(32764), AOM_ICDF(32768)},
- {AOM_ICDF(14592), AOM_ICDF(22686), AOM_ICDF(27176), AOM_ICDF(29666),
- AOM_ICDF(31047), AOM_ICDF(31813), AOM_ICDF(32238), AOM_ICDF(32474),
- AOM_ICDF(32605), AOM_ICDF(32678), AOM_ICDF(32718), AOM_ICDF(32740),
- AOM_ICDF(32752), AOM_ICDF(32759), AOM_ICDF(32763), AOM_ICDF(32768)},
- {AOM_ICDF(14336), AOM_ICDF(22400), AOM_ICDF(26936), AOM_ICDF(29488),
- AOM_ICDF(30923), AOM_ICDF(31730), AOM_ICDF(32184), AOM_ICDF(32439),
- AOM_ICDF(32583), AOM_ICDF(32664), AOM_ICDF(32709), AOM_ICDF(32735),
- AOM_ICDF(32749), AOM_ICDF(32757), AOM_ICDF(32762), AOM_ICDF(32768)},
- {AOM_ICDF(14079), AOM_ICDF(22109), AOM_ICDF(26689), AOM_ICDF(29301),
- AOM_ICDF(30791), AOM_ICDF(31641), AOM_ICDF(32125), AOM_ICDF(32401),
- AOM_ICDF(32559), AOM_ICDF(32649), AOM_ICDF(32700), AOM_ICDF(32729),
- AOM_ICDF(32746), AOM_ICDF(32756), AOM_ICDF(32761), AOM_ICDF(32768)},
- {AOM_ICDF(13825), AOM_ICDF(21817), AOM_ICDF(26437), AOM_ICDF(29108),
- AOM_ICDF(30652), AOM_ICDF(31545), AOM_ICDF(32061), AOM_ICDF(32359),
- AOM_ICDF(32532), AOM_ICDF(32632), AOM_ICDF(32690), AOM_ICDF(32723),
- AOM_ICDF(32742), AOM_ICDF(32753), AOM_ICDF(32759), AOM_ICDF(32768)},
- {AOM_ICDF(13568), AOM_ICDF(21518), AOM_ICDF(26176), AOM_ICDF(28905),
- AOM_ICDF(30504), AOM_ICDF(31441), AOM_ICDF(31990), AOM_ICDF(32312),
- AOM_ICDF(32501), AOM_ICDF(32611), AOM_ICDF(32676), AOM_ICDF(32714),
- AOM_ICDF(32736), AOM_ICDF(32749), AOM_ICDF(32757), AOM_ICDF(32768)},
- {AOM_ICDF(13314), AOM_ICDF(21218), AOM_ICDF(25911), AOM_ICDF(28697),
- AOM_ICDF(30351), AOM_ICDF(31333), AOM_ICDF(31916), AOM_ICDF(32262),
- AOM_ICDF(32468), AOM_ICDF(32590), AOM_ICDF(32662), AOM_ICDF(32705),
- AOM_ICDF(32731), AOM_ICDF(32746), AOM_ICDF(32755), AOM_ICDF(32768)},
- {AOM_ICDF(13054), AOM_ICDF(20908), AOM_ICDF(25633), AOM_ICDF(28475),
- AOM_ICDF(30185), AOM_ICDF(31214), AOM_ICDF(31833), AOM_ICDF(32205),
- AOM_ICDF(32429), AOM_ICDF(32564), AOM_ICDF(32645), AOM_ICDF(32694),
- AOM_ICDF(32723), AOM_ICDF(32741), AOM_ICDF(32752), AOM_ICDF(32768)},
- {AOM_ICDF(12803), AOM_ICDF(20603), AOM_ICDF(25356), AOM_ICDF(28252),
- AOM_ICDF(30017), AOM_ICDF(31093), AOM_ICDF(31748), AOM_ICDF(32147),
- AOM_ICDF(32390), AOM_ICDF(32538), AOM_ICDF(32628), AOM_ICDF(32683),
- AOM_ICDF(32717), AOM_ICDF(32737), AOM_ICDF(32749), AOM_ICDF(32768)},
- {AOM_ICDF(12544), AOM_ICDF(20286), AOM_ICDF(25064), AOM_ICDF(28013),
- AOM_ICDF(29833), AOM_ICDF(30956), AOM_ICDF(31649), AOM_ICDF(32077),
- AOM_ICDF(32341), AOM_ICDF(32504), AOM_ICDF(32605), AOM_ICDF(32667),
- AOM_ICDF(32705), AOM_ICDF(32729), AOM_ICDF(32744), AOM_ICDF(32768)},
- {AOM_ICDF(12288), AOM_ICDF(19968), AOM_ICDF(24768), AOM_ICDF(27768),
- AOM_ICDF(29643), AOM_ICDF(30815), AOM_ICDF(31547), AOM_ICDF(32005),
- AOM_ICDF(32291), AOM_ICDF(32470), AOM_ICDF(32582), AOM_ICDF(32652),
- AOM_ICDF(32696), AOM_ICDF(32723), AOM_ICDF(32740), AOM_ICDF(32768)},
- {AOM_ICDF(12033), AOM_ICDF(19647), AOM_ICDF(24465), AOM_ICDF(27514),
- AOM_ICDF(29443), AOM_ICDF(30664), AOM_ICDF(31437), AOM_ICDF(31926),
- AOM_ICDF(32235), AOM_ICDF(32431), AOM_ICDF(32555), AOM_ICDF(32633),
- AOM_ICDF(32683), AOM_ICDF(32714), AOM_ICDF(32734), AOM_ICDF(32768)},
- {AOM_ICDF(11777), AOM_ICDF(19321), AOM_ICDF(24154), AOM_ICDF(27250),
- AOM_ICDF(29233), AOM_ICDF(30504), AOM_ICDF(31318), AOM_ICDF(31839),
- AOM_ICDF(32173), AOM_ICDF(32387), AOM_ICDF(32524), AOM_ICDF(32612),
- AOM_ICDF(32668), AOM_ICDF(32704), AOM_ICDF(32727), AOM_ICDF(32768)},
- {AOM_ICDF(11521), AOM_ICDF(18991), AOM_ICDF(23835), AOM_ICDF(26976),
- AOM_ICDF(29013), AOM_ICDF(30334), AOM_ICDF(31190), AOM_ICDF(31745),
- AOM_ICDF(32105), AOM_ICDF(32338), AOM_ICDF(32489), AOM_ICDF(32587),
- AOM_ICDF(32651), AOM_ICDF(32692), AOM_ICDF(32719), AOM_ICDF(32768)},
- {AOM_ICDF(11265), AOM_ICDF(18657), AOM_ICDF(23508), AOM_ICDF(26691),
- AOM_ICDF(28780), AOM_ICDF(30151), AOM_ICDF(31051), AOM_ICDF(31641),
- AOM_ICDF(32028), AOM_ICDF(32282), AOM_ICDF(32449), AOM_ICDF(32559),
- AOM_ICDF(32631), AOM_ICDF(32678), AOM_ICDF(32709), AOM_ICDF(32768)},
- {AOM_ICDF(11006), AOM_ICDF(18316), AOM_ICDF(23170), AOM_ICDF(26394),
- AOM_ICDF(28535), AOM_ICDF(29957), AOM_ICDF(30901), AOM_ICDF(31528),
- AOM_ICDF(31944), AOM_ICDF(32220), AOM_ICDF(32404), AOM_ICDF(32526),
- AOM_ICDF(32607), AOM_ICDF(32661), AOM_ICDF(32697), AOM_ICDF(32768)},
- {AOM_ICDF(10752), AOM_ICDF(17976), AOM_ICDF(22830), AOM_ICDF(26091),
- AOM_ICDF(28282), AOM_ICDF(29754), AOM_ICDF(30743), AOM_ICDF(31408),
- AOM_ICDF(31854), AOM_ICDF(32154), AOM_ICDF(32356), AOM_ICDF(32491),
- AOM_ICDF(32582), AOM_ICDF(32643), AOM_ICDF(32684), AOM_ICDF(32768)},
- {AOM_ICDF(10496), AOM_ICDF(17630), AOM_ICDF(22479), AOM_ICDF(25775),
- AOM_ICDF(28015), AOM_ICDF(29538), AOM_ICDF(30573), AOM_ICDF(31276),
- AOM_ICDF(31754), AOM_ICDF(32079), AOM_ICDF(32300), AOM_ICDF(32450),
- AOM_ICDF(32552), AOM_ICDF(32621), AOM_ICDF(32668), AOM_ICDF(32768)},
- {AOM_ICDF(10240), AOM_ICDF(17280), AOM_ICDF(22120), AOM_ICDF(25448),
- AOM_ICDF(27736), AOM_ICDF(29309), AOM_ICDF(30390), AOM_ICDF(31133),
- AOM_ICDF(31644), AOM_ICDF(31995), AOM_ICDF(32237), AOM_ICDF(32403),
- AOM_ICDF(32517), AOM_ICDF(32595), AOM_ICDF(32649), AOM_ICDF(32768)},
- { AOM_ICDF(9984), AOM_ICDF(16926), AOM_ICDF(21753), AOM_ICDF(25109),
- AOM_ICDF(27443), AOM_ICDF(29066), AOM_ICDF(30194), AOM_ICDF(30978),
- AOM_ICDF(31523), AOM_ICDF(31902), AOM_ICDF(32166), AOM_ICDF(32349),
- AOM_ICDF(32476), AOM_ICDF(32565), AOM_ICDF(32627), AOM_ICDF(32768)},
- { AOM_ICDF(9728), AOM_ICDF(16568), AOM_ICDF(21377), AOM_ICDF(24759),
- AOM_ICDF(27137), AOM_ICDF(28809), AOM_ICDF(29984), AOM_ICDF(30811),
- AOM_ICDF(31392), AOM_ICDF(31801), AOM_ICDF(32088), AOM_ICDF(32290),
- AOM_ICDF(32432), AOM_ICDF(32532), AOM_ICDF(32602), AOM_ICDF(32768)},
- { AOM_ICDF(9474), AOM_ICDF(16208), AOM_ICDF(20995), AOM_ICDF(24399),
- AOM_ICDF(26819), AOM_ICDF(28539), AOM_ICDF(29762), AOM_ICDF(30631),
- AOM_ICDF(31249), AOM_ICDF(31688), AOM_ICDF(32000), AOM_ICDF(32222),
- AOM_ICDF(32380), AOM_ICDF(32492), AOM_ICDF(32572), AOM_ICDF(32768)},
- { AOM_ICDF(9216), AOM_ICDF(15840), AOM_ICDF(20601), AOM_ICDF(24023),
- AOM_ICDF(26483), AOM_ICDF(28251), AOM_ICDF(29522), AOM_ICDF(30435),
- AOM_ICDF(31091), AOM_ICDF(31563), AOM_ICDF(31902), AOM_ICDF(32146),
- AOM_ICDF(32321), AOM_ICDF(32447), AOM_ICDF(32537), AOM_ICDF(32768)},
- { AOM_ICDF(8959), AOM_ICDF(15469), AOM_ICDF(20199), AOM_ICDF(23636),
- AOM_ICDF(26133), AOM_ICDF(27947), AOM_ICDF(29265), AOM_ICDF(30223),
- AOM_ICDF(30919), AOM_ICDF(31425), AOM_ICDF(31792), AOM_ICDF(32059),
- AOM_ICDF(32253), AOM_ICDF(32394), AOM_ICDF(32496), AOM_ICDF(32768)},
- { AOM_ICDF(8705), AOM_ICDF(15097), AOM_ICDF(19791), AOM_ICDF(23238),
- AOM_ICDF(25770), AOM_ICDF(27629), AOM_ICDF(28994), AOM_ICDF(29997),
- AOM_ICDF(30733), AOM_ICDF(31274), AOM_ICDF(31671), AOM_ICDF(31963),
- AOM_ICDF(32177), AOM_ICDF(32334), AOM_ICDF(32449), AOM_ICDF(32768)},
- { AOM_ICDF(8449), AOM_ICDF(14719), AOM_ICDF(19373), AOM_ICDF(22827),
- AOM_ICDF(25390), AOM_ICDF(27292), AOM_ICDF(28704), AOM_ICDF(29752),
- AOM_ICDF(30530), AOM_ICDF(31107), AOM_ICDF(31535), AOM_ICDF(31853),
- AOM_ICDF(32089), AOM_ICDF(32264), AOM_ICDF(32394), AOM_ICDF(32768)},
- { AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(18944), AOM_ICDF(22400),
- AOM_ICDF(24992), AOM_ICDF(26936), AOM_ICDF(28394), AOM_ICDF(29488),
- AOM_ICDF(30308), AOM_ICDF(30923), AOM_ICDF(31384), AOM_ICDF(31730),
- AOM_ICDF(31989), AOM_ICDF(32184), AOM_ICDF(32330), AOM_ICDF(32768)},
- { AOM_ICDF(7936), AOM_ICDF(13950), AOM_ICDF(18507), AOM_ICDF(21961),
- AOM_ICDF(24578), AOM_ICDF(26561), AOM_ICDF(28064), AOM_ICDF(29203),
- AOM_ICDF(30066), AOM_ICDF(30720), AOM_ICDF(31216), AOM_ICDF(31592),
- AOM_ICDF(31877), AOM_ICDF(32093), AOM_ICDF(32256), AOM_ICDF(32768)},
- { AOM_ICDF(7678), AOM_ICDF(13558), AOM_ICDF(18060), AOM_ICDF(21507),
- AOM_ICDF(24146), AOM_ICDF(26166), AOM_ICDF(27713), AOM_ICDF(28897),
- AOM_ICDF(29804), AOM_ICDF(30498), AOM_ICDF(31030), AOM_ICDF(31437),
- AOM_ICDF(31749), AOM_ICDF(31988), AOM_ICDF(32171), AOM_ICDF(32768)},
- { AOM_ICDF(7423), AOM_ICDF(13165), AOM_ICDF(17606), AOM_ICDF(21041),
- AOM_ICDF(23698), AOM_ICDF(25753), AOM_ICDF(27342), AOM_ICDF(28571),
- AOM_ICDF(29522), AOM_ICDF(30257), AOM_ICDF(30826), AOM_ICDF(31266),
- AOM_ICDF(31606), AOM_ICDF(31869), AOM_ICDF(32073), AOM_ICDF(32768)},
- { AOM_ICDF(7168), AOM_ICDF(12768), AOM_ICDF(17143), AOM_ICDF(20561),
- AOM_ICDF(23231), AOM_ICDF(25317), AOM_ICDF(26947), AOM_ICDF(28220),
- AOM_ICDF(29215), AOM_ICDF(29992), AOM_ICDF(30599), AOM_ICDF(31073),
- AOM_ICDF(31444), AOM_ICDF(31734), AOM_ICDF(31960), AOM_ICDF(32768)},
- { AOM_ICDF(6911), AOM_ICDF(12365), AOM_ICDF(16669), AOM_ICDF(20065),
- AOM_ICDF(22744), AOM_ICDF(24858), AOM_ICDF(26526), AOM_ICDF(27842),
- AOM_ICDF(28881), AOM_ICDF(29701), AOM_ICDF(30348), AOM_ICDF(30858),
- AOM_ICDF(31261), AOM_ICDF(31579), AOM_ICDF(31830), AOM_ICDF(32768)},
- { AOM_ICDF(6657), AOM_ICDF(11961), AOM_ICDF(16188), AOM_ICDF(19556),
- AOM_ICDF(22240), AOM_ICDF(24379), AOM_ICDF(26083), AOM_ICDF(27441),
- AOM_ICDF(28523), AOM_ICDF(29385), AOM_ICDF(30072), AOM_ICDF(30620),
- AOM_ICDF(31056), AOM_ICDF(31404), AOM_ICDF(31681), AOM_ICDF(32768)},
- { AOM_ICDF(6400), AOM_ICDF(11550), AOM_ICDF(15694), AOM_ICDF(19029),
- AOM_ICDF(21712), AOM_ICDF(23871), AOM_ICDF(25609), AOM_ICDF(27007),
- AOM_ICDF(28132), AOM_ICDF(29037), AOM_ICDF(29766), AOM_ICDF(30352),
- AOM_ICDF(30824), AOM_ICDF(31204), AOM_ICDF(31509), AOM_ICDF(32768)},
- { AOM_ICDF(6142), AOM_ICDF(11134), AOM_ICDF(15190), AOM_ICDF(18486),
- AOM_ICDF(21164), AOM_ICDF(23340), AOM_ICDF(25108), AOM_ICDF(26544),
- AOM_ICDF(27711), AOM_ICDF(28659), AOM_ICDF(29429), AOM_ICDF(30055),
- AOM_ICDF(30564), AOM_ICDF(30977), AOM_ICDF(31313), AOM_ICDF(32768)},
- { AOM_ICDF(5890), AOM_ICDF(10720), AOM_ICDF(14682), AOM_ICDF(17932),
- AOM_ICDF(20598), AOM_ICDF(22785), AOM_ICDF(24579), AOM_ICDF(26051),
- AOM_ICDF(27258), AOM_ICDF(28248), AOM_ICDF(29060), AOM_ICDF(29726),
- AOM_ICDF(30273), AOM_ICDF(30721), AOM_ICDF(31089), AOM_ICDF(32768)},
- { AOM_ICDF(5631), AOM_ICDF(10295), AOM_ICDF(14157), AOM_ICDF(17356),
- AOM_ICDF(20005), AOM_ICDF(22199), AOM_ICDF(24016), AOM_ICDF(25520),
- AOM_ICDF(26766), AOM_ICDF(27798), AOM_ICDF(28652), AOM_ICDF(29359),
- AOM_ICDF(29945), AOM_ICDF(30430), AOM_ICDF(30832), AOM_ICDF(32768)},
- { AOM_ICDF(5377), AOM_ICDF(9871), AOM_ICDF(13628), AOM_ICDF(16768),
- AOM_ICDF(19393), AOM_ICDF(21587), AOM_ICDF(23421), AOM_ICDF(24954),
- AOM_ICDF(26236), AOM_ICDF(27308), AOM_ICDF(28204), AOM_ICDF(28953),
- AOM_ICDF(29579), AOM_ICDF(30102), AOM_ICDF(30539), AOM_ICDF(32768)},
- { AOM_ICDF(5121), AOM_ICDF(9441), AOM_ICDF(13086), AOM_ICDF(16161),
- AOM_ICDF(18756), AOM_ICDF(20945), AOM_ICDF(22792), AOM_ICDF(24351),
- AOM_ICDF(25666), AOM_ICDF(26776), AOM_ICDF(27712), AOM_ICDF(28502),
- AOM_ICDF(29169), AOM_ICDF(29731), AOM_ICDF(30206), AOM_ICDF(32768)},
- { AOM_ICDF(4865), AOM_ICDF(9007), AOM_ICDF(12534), AOM_ICDF(15538),
- AOM_ICDF(18096), AOM_ICDF(20274), AOM_ICDF(22129), AOM_ICDF(23708),
- AOM_ICDF(25053), AOM_ICDF(26198), AOM_ICDF(27173), AOM_ICDF(28004),
- AOM_ICDF(28711), AOM_ICDF(29313), AOM_ICDF(29826), AOM_ICDF(32768)},
- { AOM_ICDF(4608), AOM_ICDF(8568), AOM_ICDF(11971), AOM_ICDF(14896),
- AOM_ICDF(17409), AOM_ICDF(19569), AOM_ICDF(21425), AOM_ICDF(23020),
- AOM_ICDF(24391), AOM_ICDF(25569), AOM_ICDF(26581), AOM_ICDF(27451),
- AOM_ICDF(28199), AOM_ICDF(28842), AOM_ICDF(29394), AOM_ICDF(32768)},
- { AOM_ICDF(4351), AOM_ICDF(8125), AOM_ICDF(11398), AOM_ICDF(14236),
- AOM_ICDF(16697), AOM_ICDF(18831), AOM_ICDF(20682), AOM_ICDF(22287),
- AOM_ICDF(23679), AOM_ICDF(24886), AOM_ICDF(25933), AOM_ICDF(26841),
- AOM_ICDF(27628), AOM_ICDF(28311), AOM_ICDF(28903), AOM_ICDF(32768)},
- { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
- AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(21508),
- AOM_ICDF(22915), AOM_ICDF(24146), AOM_ICDF(25224), AOM_ICDF(26167),
- AOM_ICDF(26992), AOM_ICDF(27714), AOM_ICDF(28346), AOM_ICDF(32768)},
- { AOM_ICDF(3840), AOM_ICDF(7230), AOM_ICDF(10223), AOM_ICDF(12865),
- AOM_ICDF(15197), AOM_ICDF(17256), AOM_ICDF(19074), AOM_ICDF(20679),
- AOM_ICDF(22096), AOM_ICDF(23347), AOM_ICDF(24451), AOM_ICDF(25426),
- AOM_ICDF(26287), AOM_ICDF(27047), AOM_ICDF(27718), AOM_ICDF(32768)},
- { AOM_ICDF(3584), AOM_ICDF(6776), AOM_ICDF(9619), AOM_ICDF(12151),
- AOM_ICDF(14406), AOM_ICDF(16414), AOM_ICDF(18203), AOM_ICDF(19796),
- AOM_ICDF(21215), AOM_ICDF(22479), AOM_ICDF(23604), AOM_ICDF(24606),
- AOM_ICDF(25499), AOM_ICDF(26294), AOM_ICDF(27002), AOM_ICDF(32768)},
- { AOM_ICDF(3328), AOM_ICDF(6318), AOM_ICDF(9004), AOM_ICDF(11417),
- AOM_ICDF(13585), AOM_ICDF(15533), AOM_ICDF(17283), AOM_ICDF(18856),
- AOM_ICDF(20269), AOM_ICDF(21538), AOM_ICDF(22678), AOM_ICDF(23703),
- AOM_ICDF(24624), AOM_ICDF(25451), AOM_ICDF(26194), AOM_ICDF(32768)},
- { AOM_ICDF(3072), AOM_ICDF(5856), AOM_ICDF(8379), AOM_ICDF(10665),
- AOM_ICDF(12737), AOM_ICDF(14615), AOM_ICDF(16317), AOM_ICDF(17859),
- AOM_ICDF(19257), AOM_ICDF(20524), AOM_ICDF(21672), AOM_ICDF(22712),
- AOM_ICDF(23655), AOM_ICDF(24509), AOM_ICDF(25283), AOM_ICDF(32768)},
- { AOM_ICDF(2816), AOM_ICDF(5390), AOM_ICDF(7743), AOM_ICDF(9894),
- AOM_ICDF(11860), AOM_ICDF(13657), AOM_ICDF(15299), AOM_ICDF(16800),
- AOM_ICDF(18172), AOM_ICDF(19426), AOM_ICDF(20573), AOM_ICDF(21621),
- AOM_ICDF(22579), AOM_ICDF(23455), AOM_ICDF(24255), AOM_ICDF(32768)},
- { AOM_ICDF(2560), AOM_ICDF(4920), AOM_ICDF(7096), AOM_ICDF(9102),
- AOM_ICDF(10951), AOM_ICDF(12656), AOM_ICDF(14227), AOM_ICDF(15676),
- AOM_ICDF(17011), AOM_ICDF(18242), AOM_ICDF(19377), AOM_ICDF(20423),
- AOM_ICDF(21388), AOM_ICDF(22277), AOM_ICDF(23097), AOM_ICDF(32768)},
- { AOM_ICDF(2304), AOM_ICDF(4446), AOM_ICDF(6437), AOM_ICDF(8288),
- AOM_ICDF(10009), AOM_ICDF(11609), AOM_ICDF(13097), AOM_ICDF(14480),
- AOM_ICDF(15766), AOM_ICDF(16961), AOM_ICDF(18072), AOM_ICDF(19105),
- AOM_ICDF(20066), AOM_ICDF(20959), AOM_ICDF(21789), AOM_ICDF(32768)},
- { AOM_ICDF(2048), AOM_ICDF(3968), AOM_ICDF(5768), AOM_ICDF(7456),
- AOM_ICDF(9038), AOM_ICDF(10521), AOM_ICDF(11911), AOM_ICDF(13215),
- AOM_ICDF(14437), AOM_ICDF(15583), AOM_ICDF(16657), AOM_ICDF(17664),
- AOM_ICDF(18608), AOM_ICDF(19493), AOM_ICDF(20323), AOM_ICDF(32768)},
- { AOM_ICDF(1792), AOM_ICDF(3486), AOM_ICDF(5087), AOM_ICDF(6601),
- AOM_ICDF(8032), AOM_ICDF(9385), AOM_ICDF(10664), AOM_ICDF(11873),
- AOM_ICDF(13016), AOM_ICDF(14096), AOM_ICDF(15117), AOM_ICDF(16082),
- AOM_ICDF(16995), AOM_ICDF(17858), AOM_ICDF(18673), AOM_ICDF(32768)},
- { AOM_ICDF(1536), AOM_ICDF(3000), AOM_ICDF(4395), AOM_ICDF(5725),
- AOM_ICDF(6993), AOM_ICDF(8201), AOM_ICDF(9353), AOM_ICDF(10451),
- AOM_ICDF(11497), AOM_ICDF(12494), AOM_ICDF(13444), AOM_ICDF(14350),
- AOM_ICDF(15213), AOM_ICDF(16036), AOM_ICDF(16820), AOM_ICDF(32768)},
- { AOM_ICDF(1280), AOM_ICDF(2510), AOM_ICDF(3692), AOM_ICDF(4828),
- AOM_ICDF(5919), AOM_ICDF(6968), AOM_ICDF(7976), AOM_ICDF(8944),
- AOM_ICDF(9875), AOM_ICDF(10769), AOM_ICDF(11628), AOM_ICDF(12454),
- AOM_ICDF(13248), AOM_ICDF(14011), AOM_ICDF(14744), AOM_ICDF(32768)},
- { AOM_ICDF(1024), AOM_ICDF(2016), AOM_ICDF(2977), AOM_ICDF(3908),
- AOM_ICDF(4810), AOM_ICDF(5684), AOM_ICDF(6530), AOM_ICDF(7350),
- AOM_ICDF(8144), AOM_ICDF(8913), AOM_ICDF(9658), AOM_ICDF(10380),
- AOM_ICDF(11080), AOM_ICDF(11758), AOM_ICDF(12415), AOM_ICDF(32768)},
- { AOM_ICDF(768), AOM_ICDF(1518), AOM_ICDF(2250), AOM_ICDF(2965),
- AOM_ICDF(3663), AOM_ICDF(4345), AOM_ICDF(5011), AOM_ICDF(5662),
- AOM_ICDF(6297), AOM_ICDF(6917), AOM_ICDF(7523), AOM_ICDF(8115),
- AOM_ICDF(8693), AOM_ICDF(9257), AOM_ICDF(9808), AOM_ICDF(32768)},
- { AOM_ICDF(512), AOM_ICDF(1016), AOM_ICDF(1512), AOM_ICDF(2000),
- AOM_ICDF(2481), AOM_ICDF(2954), AOM_ICDF(3420), AOM_ICDF(3879),
- AOM_ICDF(4330), AOM_ICDF(4774), AOM_ICDF(5211), AOM_ICDF(5642),
- AOM_ICDF(6066), AOM_ICDF(6483), AOM_ICDF(6894), AOM_ICDF(32768)},
- { AOM_ICDF(256), AOM_ICDF(510), AOM_ICDF(762), AOM_ICDF(1012),
- AOM_ICDF(1260), AOM_ICDF(1506), AOM_ICDF(1750), AOM_ICDF(1992),
- AOM_ICDF(2232), AOM_ICDF(2471), AOM_ICDF(2708), AOM_ICDF(2943),
- AOM_ICDF(3176), AOM_ICDF(3407), AOM_ICDF(3636), AOM_ICDF(32768)},
-};
-
-
-const uint16_t LAPLACE_OFFSET[128] = {
- 0,
- 29871,
- 28672,
- 27751,
- 26975,
- 26291,
- 25673,
- 25105,
- 24576,
- 24079,
- 23609,
- 23162,
- 22734,
- 22325,
- 21931,
- 21550,
- 21182,
- 20826,
- 20480,
- 20143,
- 19815,
- 19495,
- 19183,
- 18877,
- 18579,
- 18286,
- 17999,
- 17718,
- 17442,
- 17170,
- 16904,
- 16642,
- 16384,
- 16129,
- 15879,
- 15633,
- 15390,
- 15150,
- 14913,
- 14680,
- 14450,
- 14222,
- 13997,
- 13775,
- 13556,
- 13338,
- 13124,
- 12911,
- 12701,
- 12493,
- 12288,
- 12084,
- 11882,
- 11682,
- 11484,
- 11288,
- 11094,
- 10901,
- 10710,
- 10521,
- 10333,
- 10147,
- 9962,
- 9779,
- 9597,
- 9417,
- 9238,
- 9060,
- 8884,
- 8709,
- 8535,
- 8363,
- 8192,
- 8021,
- 7853,
- 7685,
- 7518,
- 7352,
- 7188,
- 7025,
- 6862,
- 6701,
- 6540,
- 6381,
- 6222,
- 6065,
- 5908,
- 5753,
- 5598,
- 5444,
- 5291,
- 5138,
- 4987,
- 4837,
- 4687,
- 4538,
- 4390,
- 4242,
- 4096,
- 3950,
- 3804,
- 3660,
- 3516,
- 3373,
- 3231,
- 3089,
- 2948,
- 2808,
- 2668,
- 2529,
- 2391,
- 2253,
- 2116,
- 1979,
- 1843,
- 1708,
- 1573,
- 1439,
- 1306,
- 1172,
- 1040,
- 908,
- 777,
- 646,
- 516,
- 386,
- 257,
- 128,
-};
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c
deleted file mode 100644
index ff461b914..000000000
--- a/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-#include "aom_dsp/mips/inv_txfm_msa.h"
-
-void av1_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
- int32_t dst_stride, TxfmParam *txfm_param) {
- int32_t i;
- DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
- int16_t *out_ptr = &out[0];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- switch (tx_type) {
- case DCT_DCT:
- /* transform rows */
- for (i = 0; i < 2; ++i) {
- /* process 16 * 8 block */
- aom_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
- }
-
- /* transform columns */
- for (i = 0; i < 2; ++i) {
- /* process 8 * 16 block */
- aom_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
- dst_stride);
- }
- break;
- case ADST_DCT:
- /* transform rows */
- for (i = 0; i < 2; ++i) {
- /* process 16 * 8 block */
- aom_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
- }
-
- /* transform columns */
- for (i = 0; i < 2; ++i) {
- aom_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dst + (i << 3)), dst_stride);
- }
- break;
- case DCT_ADST:
- /* transform rows */
- for (i = 0; i < 2; ++i) {
- /* process 16 * 8 block */
- aom_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
- }
-
- /* transform columns */
- for (i = 0; i < 2; ++i) {
- /* process 8 * 16 block */
- aom_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
- dst_stride);
- }
- break;
- case ADST_ADST:
- /* transform rows */
- for (i = 0; i < 2; ++i) {
- /* process 16 * 8 block */
- aom_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
- }
-
- /* transform columns */
- for (i = 0; i < 2; ++i) {
- aom_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dst + (i << 3)), dst_stride);
- }
- break;
- default: assert(0); break;
- }
-}
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c
deleted file mode 100644
index 37f7fd77b..000000000
--- a/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-#include "aom_dsp/mips/inv_txfm_msa.h"
-
-void av1_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
- int32_t dst_stride, TxfmParam *txfm_param) {
- v8i16 in0, in1, in2, in3;
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- /* load vector elements of 4x4 block */
- LD4x4_SH(input, in0, in1, in2, in3);
- TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
-
- switch (tx_type) {
- case DCT_DCT:
- /* DCT in horizontal */
- AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- /* DCT in vertical */
- TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
- AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- break;
- case ADST_DCT:
- /* DCT in horizontal */
- AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- /* ADST in vertical */
- TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
- AOM_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- break;
- case DCT_ADST:
- /* ADST in horizontal */
- AOM_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- /* DCT in vertical */
- TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
- AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- break;
- case ADST_ADST:
- /* ADST in horizontal */
- AOM_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- /* ADST in vertical */
- TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
- AOM_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
- break;
- default: assert(0); break;
- }
-
- /* final rounding (add 2^3, divide by 2^4) and shift */
- SRARI_H4_SH(in0, in1, in2, in3, 4);
- /* add block and store 4x4 */
- ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
-}
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c
deleted file mode 100644
index 7410f7b98..000000000
--- a/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-#include "aom_dsp/mips/inv_txfm_msa.h"
-
-void av1_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
- int32_t dst_stride, TxfmParam *txfm_param) {
- v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- /* load vector elements of 8x8 block */
- LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
-
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
- in4, in5, in6, in7);
-
- switch (tx_type) {
- case DCT_DCT:
- /* DCT in horizontal */
- AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
- in4, in5, in6, in7);
- /* DCT in vertical */
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
- in3, in4, in5, in6, in7);
- AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
- in4, in5, in6, in7);
- break;
- case ADST_DCT:
- /* DCT in horizontal */
- AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
- in4, in5, in6, in7);
- /* ADST in vertical */
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
- in3, in4, in5, in6, in7);
- AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
- in5, in6, in7);
- break;
- case DCT_ADST:
- /* ADST in horizontal */
- AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
- in5, in6, in7);
- /* DCT in vertical */
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
- in3, in4, in5, in6, in7);
- AOM_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
- in4, in5, in6, in7);
- break;
- case ADST_ADST:
- /* ADST in horizontal */
- AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
- in5, in6, in7);
- /* ADST in vertical */
- TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
- in3, in4, in5, in6, in7);
- AOM_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
- in5, in6, in7);
- break;
- default: assert(0); break;
- }
-
- /* final rounding (add 2^4, divide by 2^5) and shift */
- SRARI_H4_SH(in0, in1, in2, in3, 5);
- SRARI_H4_SH(in4, in5, in6, in7, 5);
-
- /* add block and store 8x8 */
- AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
- dst += (4 * dst_stride);
- AOM_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
-}
diff --git a/third_party/aom/av1/common/mv.h b/third_party/aom/av1/common/mv.h
index 65f0f7eda..a6227f18f 100644
--- a/third_party/aom/av1/common/mv.h
+++ b/third_party/aom/av1/common/mv.h
@@ -27,6 +27,8 @@ typedef struct mv {
int16_t col;
} MV;
+static const MV kZeroMv = { 0, 0 };
+
typedef union int_mv {
uint32_t as_int;
MV as_mv;
@@ -37,11 +39,6 @@ typedef struct mv32 {
int32_t col;
} MV32;
-#if CONFIG_WARPED_MOTION
-#define WARPED_MOTION_SORT_SAMPLES 1
-#endif // CONFIG_WARPED_MOTION
-
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// Bits of precision used for the model
#define WARPEDMODEL_PREC_BITS 16
#define WARPEDMODEL_ROW3HOMO_PREC_BITS 16
@@ -54,19 +51,8 @@ typedef struct mv32 {
#define WARPEDPIXEL_PREC_BITS 6
#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
-// Taps for ntap filter
-#define WARPEDPIXEL_FILTER_TAPS 6
-
-// Precision of filter taps
-#define WARPEDPIXEL_FILTER_BITS 7
-
#define WARP_PARAM_REDUCE_BITS 6
-// Precision bits reduction after horizontal shear
-#define HORSHEAR_REDUCE_PREC_BITS 5
-#define VERSHEAR_REDUCE_PREC_BITS \
- (2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)
-
#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
/* clang-format off */
@@ -75,10 +61,7 @@ typedef enum {
TRANSLATION = 1, // translational motion 2-parameter
ROTZOOM = 2, // simplified affine with rotation + zoom only, 4-parameter
AFFINE = 3, // affine, 6-parameter
- HORTRAPEZOID = 4, // constrained homography, hor trapezoid, 6-parameter
- VERTRAPEZOID = 5, // constrained homography, ver trapezoid, 6-parameter
- HOMOGRAPHY = 6, // homography, 8-parameter
- TRANS_TYPES = 7,
+ TRANS_TYPES,
} TransformationType;
/* clang-format on */
@@ -90,24 +73,13 @@ typedef enum {
// GLOBAL_TRANS_TYPES 7 - up to full homography
#define GLOBAL_TRANS_TYPES 4
-#if GLOBAL_TRANS_TYPES > 4
-// First bit indicates whether using identity or not
-// GLOBAL_TYPE_BITS=ceiling(log2(GLOBAL_TRANS_TYPES-1)) is the
-// number of bits needed to cover the remaining possibilities
-#define GLOBAL_TYPE_BITS (get_msb(2 * GLOBAL_TRANS_TYPES - 3))
-#endif // GLOBAL_TRANS_TYPES > 4
-
typedef struct {
-#if CONFIG_GLOBAL_MOTION
int global_warp_allowed;
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
int local_warp_allowed;
-#endif // CONFIG_WARPED_MOTION
} WarpTypesAllowed;
// number of parameters used by each transformation in TransformationTypes
-static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6, 6, 6, 8 };
+static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
// The order of values in the wmmat matrix below is best described
// by the homography:
@@ -118,6 +90,7 @@ typedef struct {
TransformationType wmtype;
int32_t wmmat[8];
int16_t alpha, beta, gamma, delta;
+ int8_t invalid;
} WarpedMotionParams;
/* clang-format off */
@@ -125,12 +98,11 @@ static const WarpedMotionParams default_warp_params = {
IDENTITY,
{ 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0,
0 },
- 0, 0, 0, 0
+ 0, 0, 0, 0,
+ 0,
};
/* clang-format on */
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_GLOBAL_MOTION
// The following constants describe the various precisions
// of different parameters in the global motion experiment.
//
@@ -187,9 +159,6 @@ static const WarpedMotionParams default_warp_params = {
#define GM_ALPHA_MIN -GM_ALPHA_MAX
#define GM_ROW3HOMO_MIN -GM_ROW3HOMO_MAX
-// Use global motion parameters for sub8x8 blocks
-#define GLOBAL_SUB8X8_USED 0
-
static INLINE int block_center_x(int mi_col, BLOCK_SIZE bs) {
const int bw = block_size_wide[bs];
return mi_col * MI_SIZE + bw / 2 - 1;
@@ -206,7 +175,6 @@ static INLINE int convert_to_trans_prec(int allow_hp, int coor) {
else
return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 2) * 2;
}
-#if CONFIG_AMVR
static INLINE void integer_mv_precision(MV *mv) {
int mod = (mv->row % 8);
if (mod != 0) {
@@ -232,7 +200,6 @@ static INLINE void integer_mv_precision(MV *mv) {
}
}
}
-#endif
// Convert a global motion vector into a motion vector at the centre of the
// given block.
//
@@ -242,14 +209,15 @@ static INLINE void integer_mv_precision(MV *mv) {
// represents an integer)
static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
int allow_hp, BLOCK_SIZE bsize,
- int mi_col, int mi_row, int block_idx
-#if CONFIG_AMVR
- ,
- int is_integer
-#endif
- ) {
- const int unify_bsize = CONFIG_CB4X4;
+ int mi_col, int mi_row,
+ int is_integer) {
int_mv res;
+
+ if (gm->wmtype == IDENTITY) {
+ res.as_int = 0;
+ return res;
+ }
+
const int32_t *mat = gm->wmmat;
int x, y, tx, ty;
@@ -265,65 +233,37 @@ static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
-#if CONFIG_AMVR
if (is_integer) {
integer_mv_precision(&res.as_mv);
}
-#endif
return res;
}
- if (bsize >= BLOCK_8X8 || unify_bsize) {
- x = block_center_x(mi_col, bsize);
- y = block_center_y(mi_row, bsize);
- } else {
- x = block_center_x(mi_col, bsize);
- y = block_center_y(mi_row, bsize);
- x += (block_idx & 1) * MI_SIZE / 2;
- y += (block_idx & 2) * MI_SIZE / 4;
- }
+ x = block_center_x(mi_col, bsize);
+ y = block_center_y(mi_row, bsize);
if (gm->wmtype == ROTZOOM) {
assert(gm->wmmat[5] == gm->wmmat[2]);
assert(gm->wmmat[4] == -gm->wmmat[3]);
}
- if (gm->wmtype > AFFINE) {
- int xc = (int)((int64_t)mat[2] * x + (int64_t)mat[3] * y + mat[0]);
- int yc = (int)((int64_t)mat[4] * x + (int64_t)mat[5] * y + mat[1]);
- const int Z = (int)((int64_t)mat[6] * x + (int64_t)mat[7] * y +
- (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
- xc *= 1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS);
- yc *= 1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS);
- xc = (int)(xc > 0 ? ((int64_t)xc + Z / 2) / Z : ((int64_t)xc - Z / 2) / Z);
- yc = (int)(yc > 0 ? ((int64_t)yc + Z / 2) / Z : ((int64_t)yc - Z / 2) / Z);
- tx = convert_to_trans_prec(allow_hp, xc) - (x << 3);
- ty = convert_to_trans_prec(allow_hp, yc) - (y << 3);
- } else {
- const int xc =
- (mat[2] - (1 << WARPEDMODEL_PREC_BITS)) * x + mat[3] * y + mat[0];
- const int yc =
- mat[4] * x + (mat[5] - (1 << WARPEDMODEL_PREC_BITS)) * y + mat[1];
- tx = convert_to_trans_prec(allow_hp, xc);
- ty = convert_to_trans_prec(allow_hp, yc);
- }
+
+ const int xc =
+ (mat[2] - (1 << WARPEDMODEL_PREC_BITS)) * x + mat[3] * y + mat[0];
+ const int yc =
+ mat[4] * x + (mat[5] - (1 << WARPEDMODEL_PREC_BITS)) * y + mat[1];
+ tx = convert_to_trans_prec(allow_hp, xc);
+ ty = convert_to_trans_prec(allow_hp, yc);
res.as_mv.row = ty;
res.as_mv.col = tx;
-#if CONFIG_AMVR
if (is_integer) {
integer_mv_precision(&res.as_mv);
}
-#endif
return res;
}
static INLINE TransformationType get_gmtype(const WarpedMotionParams *gm) {
- if (gm->wmmat[6] != 0 || gm->wmmat[7] != 0) {
- if (!gm->wmmat[6] && !gm->wmmat[4]) return HORTRAPEZOID;
- if (!gm->wmmat[7] && !gm->wmmat[3]) return VERTRAPEZOID;
- return HOMOGRAPHY;
- }
if (gm->wmmat[5] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[4] &&
gm->wmmat[2] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[3]) {
return ((!gm->wmmat[1] && !gm->wmmat[0]) ? IDENTITY : TRANSLATION);
@@ -333,12 +273,10 @@ static INLINE TransformationType get_gmtype(const WarpedMotionParams *gm) {
else
return AFFINE;
}
-#endif // CONFIG_GLOBAL_MOTION
typedef struct candidate_mv {
int_mv this_mv;
int_mv comp_mv;
- uint8_t pred_diff[2];
int weight;
} CANDIDATE_MV;
diff --git a/third_party/aom/av1/common/mvref_common.c b/third_party/aom/av1/common/mvref_common.c
index 891396e9b..6939df335 100644
--- a/third_party/aom/av1/common/mvref_common.c
+++ b/third_party/aom/av1/common/mvref_common.c
@@ -9,68 +9,72 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#include <stdlib.h>
+
#include "av1/common/mvref_common.h"
-#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
-#endif // CONFIG_WARPED_MOTION
-#if CONFIG_GLOBAL_MOTION
-#define USE_CUR_GM_REFMV 1
-#endif // CONFIG_GLOBAL_MOTION
+// Although we assign 32 bit integers, all the values are strictly under 14
+// bits.
+static int div_mult[32] = { 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
+ 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092,
+ 1024, 963, 910, 862, 819, 780, 744, 712,
+ 682, 655, 630, 606, 585, 564, 546, 528 };
+
+// TODO(jingning): Consider the use of lookup table for (num / den)
+// altogether.
+static void get_mv_projection(MV *output, MV ref, int num, int den) {
+ den = AOMMIN(den, MAX_FRAME_DISTANCE);
+ num = num > 0 ? AOMMIN(num, MAX_FRAME_DISTANCE)
+ : AOMMAX(num, -MAX_FRAME_DISTANCE);
+ int mv_row = ROUND_POWER_OF_TWO_SIGNED(ref.row * num * div_mult[den], 14);
+ int mv_col = ROUND_POWER_OF_TWO_SIGNED(ref.col * num * div_mult[den], 14);
+ const int clamp_max = MV_UPP - 1;
+ const int clamp_min = MV_LOW + 1;
+ output->row = (int16_t)clamp(mv_row, clamp_min, clamp_max);
+ output->col = (int16_t)clamp(mv_col, clamp_min, clamp_max);
+}
-void av1_copy_frame_mvs(const AV1_COMMON *const cm, MODE_INFO *mi, int mi_row,
- int mi_col, int x_mis, int y_mis) {
-#if CONFIG_TMV
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MB_MODE_INFO *mi,
+ int mi_row, int mi_col, int x_mis, int y_mis) {
const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
- MV_REF *frame_mvs = cm->cur_frame->mvs +
- ((mi_row & 0xfffe) >> 1) * frame_mvs_stride +
- ((mi_col & 0xfffe) >> 1);
+ MV_REF *frame_mvs =
+ cm->cur_frame->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
-#else
- const int frame_mvs_stride = cm->mi_cols;
- MV_REF *frame_mvs = cm->cur_frame->mvs +
- (mi_row & 0xfffe) * frame_mvs_stride + (mi_col & 0xfffe);
- x_mis = AOMMAX(x_mis, 2);
- y_mis = AOMMAX(y_mis, 2);
-#endif // CONFIG_TMV
int w, h;
for (h = 0; h < y_mis; h++) {
- MV_REF *const frame_mv = frame_mvs + h * frame_mvs_stride;
+ MV_REF *mv = frame_mvs;
for (w = 0; w < x_mis; w++) {
- MV_REF *const mv = frame_mv + w;
- mv->ref_frame[0] = mi->mbmi.ref_frame[0];
- mv->ref_frame[1] = mi->mbmi.ref_frame[1];
- mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
- mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
- // (TODO:yunqing) The following 2 lines won't be used and can be removed.
- mv->pred_mv[0].as_int = mi->mbmi.pred_mv[0].as_int;
- mv->pred_mv[1].as_int = mi->mbmi.pred_mv[1].as_int;
+ mv->ref_frame = NONE_FRAME;
+ mv->mv.as_int = 0;
+
+ for (int idx = 0; idx < 2; ++idx) {
+ MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx];
+ if (ref_frame > INTRA_FRAME) {
+ int8_t ref_idx = cm->ref_frame_side[ref_frame];
+ if (ref_idx) continue;
+ if ((abs(mi->mv[idx].as_mv.row) > REFMVS_LIMIT) ||
+ (abs(mi->mv[idx].as_mv.col) > REFMVS_LIMIT))
+ continue;
+ mv->ref_frame = ref_frame;
+ mv->mv.as_int = mi->mv[idx].as_int;
+ }
+ }
+ mv++;
}
+ frame_mvs += frame_mvs_stride;
}
}
-static uint8_t add_ref_mv_candidate(
- const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate,
- const MV_REFERENCE_FRAME rf[2], uint8_t *refmv_count,
- CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int_mv *gm_mv_candidates, const WarpedMotionParams *gm_params,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int col, int weight
-#if CONFIG_AMVR
- ,
- int is_integer
-#endif
- ) {
+static void add_ref_mv_candidate(
+ const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2],
+ uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count,
+ CANDIDATE_MV *ref_mv_stack, int_mv *gm_mv_candidates,
+ const WarpedMotionParams *gm_params, int col, int weight) {
+ if (!is_inter_block(candidate)) return; // for intrabc
int index = 0, ref;
- int newmv_count = 0;
-#if CONFIG_CB4X4
- const int unify_bsize = 1;
-#else
- const int unify_bsize = 0;
-#endif
assert(weight % 2 == 0);
if (rf[1] == NONE_FRAME) {
@@ -78,60 +82,24 @@ static uint8_t add_ref_mv_candidate(
for (ref = 0; ref < 2; ++ref) {
if (candidate->ref_frame[ref] == rf[0]) {
int_mv this_refmv;
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- if (is_global_mv_block(candidate_mi, block, gm_params[rf[0]].wmtype))
+ if (is_global_mv_block(candidate, gm_params[rf[0]].wmtype))
this_refmv = gm_mv_candidates[0];
else
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- this_refmv = get_sub_block_mv(candidate_mi, ref, col, block);
-#if CONFIG_AMVR
- lower_mv_precision(&this_refmv.as_mv, use_hp, is_integer);
-#else
- lower_mv_precision(&this_refmv.as_mv, use_hp);
-#endif // CONFIG_AMVR
+ this_refmv = get_sub_block_mv(candidate, ref, col);
for (index = 0; index < *refmv_count; ++index)
if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
- if (index < *refmv_count) ref_mv_stack[index].weight += weight * len;
+ if (index < *refmv_count) ref_mv_stack[index].weight += weight;
// Add a new item to the list.
- if (index == *refmv_count) {
+ if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
ref_mv_stack[index].this_mv = this_refmv;
- ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, ref, col, block), this_refmv);
- ref_mv_stack[index].weight = weight * len;
+ ref_mv_stack[index].weight = weight;
++(*refmv_count);
-
- if (candidate->mode == NEWMV) ++newmv_count;
- }
-
- if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0 &&
- !unify_bsize) {
- int alt_block = 3 - block;
- this_refmv = get_sub_block_mv(candidate_mi, ref, col, alt_block);
-#if CONFIG_AMVR
- lower_mv_precision(&this_refmv.as_mv, use_hp, is_integer);
-#else
- lower_mv_precision(&this_refmv.as_mv, use_hp);
-#endif
- for (index = 0; index < *refmv_count; ++index)
- if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
-
- if (index < *refmv_count) ref_mv_stack[index].weight += len;
-
- // Add a new item to the list.
- if (index == *refmv_count) {
- ref_mv_stack[index].this_mv = this_refmv;
- ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, ref, col, alt_block),
- this_refmv);
- ref_mv_stack[index].weight = len;
- ++(*refmv_count);
-
- if (candidate->mode == NEWMV) ++newmv_count;
- }
}
+ if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
+ ++*ref_match_count;
}
}
} else {
@@ -140,17 +108,10 @@ static uint8_t add_ref_mv_candidate(
int_mv this_refmv[2];
for (ref = 0; ref < 2; ++ref) {
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- if (is_global_mv_block(candidate_mi, block, gm_params[rf[ref]].wmtype))
+ if (is_global_mv_block(candidate, gm_params[rf[ref]].wmtype))
this_refmv[ref] = gm_mv_candidates[ref];
else
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- this_refmv[ref] = get_sub_block_mv(candidate_mi, ref, col, block);
-#if CONFIG_AMVR
- lower_mv_precision(&this_refmv[ref].as_mv, use_hp, is_integer);
-#else
- lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
-#endif
+ this_refmv[ref] = get_sub_block_mv(candidate, ref, col);
}
for (index = 0; index < *refmv_count; ++index)
@@ -158,94 +119,46 @@ static uint8_t add_ref_mv_candidate(
(ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
break;
- if (index < *refmv_count) ref_mv_stack[index].weight += weight * len;
+ if (index < *refmv_count) ref_mv_stack[index].weight += weight;
// Add a new item to the list.
- if (index == *refmv_count) {
+ if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
ref_mv_stack[index].this_mv = this_refmv[0];
ref_mv_stack[index].comp_mv = this_refmv[1];
- ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, 0, col, block), this_refmv[0]);
- ref_mv_stack[index].pred_diff[1] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, 1, col, block), this_refmv[1]);
- ref_mv_stack[index].weight = weight * len;
+ ref_mv_stack[index].weight = weight;
++(*refmv_count);
-
- if (candidate->mode == NEW_NEWMV) ++newmv_count;
- }
-
- if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0 &&
- !unify_bsize) {
- int alt_block = 3 - block;
- this_refmv[0] = get_sub_block_mv(candidate_mi, 0, col, alt_block);
- this_refmv[1] = get_sub_block_mv(candidate_mi, 1, col, alt_block);
-
- for (ref = 0; ref < 2; ++ref) {
-#if CONFIG_AMVR
- lower_mv_precision(&this_refmv[ref].as_mv, use_hp, is_integer);
-#else
- lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
-#endif
- }
- for (index = 0; index < *refmv_count; ++index)
- if (ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int &&
- ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)
- break;
-
- if (index < *refmv_count) ref_mv_stack[index].weight += len;
-
- // Add a new item to the list.
- if (index == *refmv_count) {
- ref_mv_stack[index].this_mv = this_refmv[0];
- ref_mv_stack[index].comp_mv = this_refmv[1];
- ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, 0, col, block),
- this_refmv[0]);
- ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
- get_sub_block_pred_mv(candidate_mi, 1, col, block),
- this_refmv[1]);
- ref_mv_stack[index].weight = len;
- ++(*refmv_count);
-
- if (candidate->mode == NEW_NEWMV) ++newmv_count;
- }
}
+ if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
+ ++*ref_match_count;
}
}
- return newmv_count;
}
-static uint8_t scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- const int mi_col, int block,
- const MV_REFERENCE_FRAME rf[2], int row_offset,
- CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int_mv *gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int max_row_offset, int *processed_rows) {
- const int end_mi = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
+static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ const MV_REFERENCE_FRAME rf[2], int row_offset,
+ CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+ uint8_t *ref_match_count, uint8_t *newmv_count,
+ int_mv *gm_mv_candidates, int max_row_offset,
+ int *processed_rows) {
+ int end_mi = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
+ end_mi = AOMMIN(end_mi, mi_size_wide[BLOCK_64X64]);
const int n8_w_8 = mi_size_wide[BLOCK_8X8];
const int n8_w_16 = mi_size_wide[BLOCK_16X16];
int i;
- uint8_t newmv_count = 0;
int col_offset = 0;
-#if CONFIG_CB4X4
const int shift = 0;
// TODO(jingning): Revisit this part after cb4x4 is stable.
if (abs(row_offset) > 1) {
col_offset = 1;
- if (mi_col & 0x01 && xd->n8_w < n8_w_8) --col_offset;
+ if ((mi_col & 0x01) && xd->n8_w < n8_w_8) --col_offset;
}
const int use_step_16 = (xd->n8_w >= 16);
-#else
- const int shift = 1;
- const int use_step_16 = (xd->n8_w >= 8);
-#endif
- MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
-
- for (i = 0; i < end_mi && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
- const MODE_INFO *const candidate_mi = candidate_mi0[col_offset + i];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ MB_MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
+ (void)mi_row;
+
+ for (i = 0; i < end_mi;) {
+ const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i];
const int candidate_bsize = candidate->sb_type;
const int n8_w = mi_size_wide[candidate_bsize];
int len = AOMMIN(xd->n8_w, n8_w);
@@ -264,60 +177,38 @@ static uint8_t scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
*processed_rows = inc - row_offset - 1;
}
-#if CONFIG_AMVR
- newmv_count += add_ref_mv_candidate(
- candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- col_offset + i, weight, cm->cur_frame_mv_precision_level);
-#else
- newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- col_offset + i, weight);
-#endif
+ add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+ newmv_count, ref_mv_stack, gm_mv_candidates,
+ cm->global_motion, col_offset + i, len * weight);
i += len;
}
-
- return newmv_count;
}
-static uint8_t scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- const int mi_row, int block,
- const MV_REFERENCE_FRAME rf[2], int col_offset,
- CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int_mv *gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int max_col_offset, int *processed_cols) {
- const int end_mi = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
+static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ const MV_REFERENCE_FRAME rf[2], int col_offset,
+ CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+ uint8_t *ref_match_count, uint8_t *newmv_count,
+ int_mv *gm_mv_candidates, int max_col_offset,
+ int *processed_cols) {
+ int end_mi = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
+ end_mi = AOMMIN(end_mi, mi_size_high[BLOCK_64X64]);
const int n8_h_8 = mi_size_high[BLOCK_8X8];
const int n8_h_16 = mi_size_high[BLOCK_16X16];
int i;
- uint8_t newmv_count = 0;
int row_offset = 0;
-#if CONFIG_CB4X4
const int shift = 0;
if (abs(col_offset) > 1) {
row_offset = 1;
- if (mi_row & 0x01 && xd->n8_h < n8_h_8) --row_offset;
+ if ((mi_row & 0x01) && xd->n8_h < n8_h_8) --row_offset;
}
const int use_step_16 = (xd->n8_h >= 16);
-#else
- const int shift = 1;
- const int use_step_16 = (xd->n8_h >= 8);
-#endif
+ (void)mi_col;
- for (i = 0; i < end_mi && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
- const MODE_INFO *const candidate_mi =
+ for (i = 0; i < end_mi;) {
+ const MB_MODE_INFO *const candidate =
xd->mi[(row_offset + i) * xd->mi_stride + col_offset];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
const int candidate_bsize = candidate->sb_type;
const int n8_h = mi_size_high[candidate_bsize];
int len = AOMMIN(xd->n8_h, n8_h);
@@ -336,79 +227,46 @@ static uint8_t scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
*processed_cols = inc - col_offset - 1;
}
-#if CONFIG_AMVR
- newmv_count += add_ref_mv_candidate(
- candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- col_offset, weight, cm->cur_frame_mv_precision_level);
-#else
- newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- col_offset, weight);
-#endif
+ add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+ newmv_count, ref_mv_stack, gm_mv_candidates,
+ cm->global_motion, col_offset, len * weight);
+
i += len;
}
-
- return newmv_count;
}
-static uint8_t scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- const int mi_row, const int mi_col, int block,
- const MV_REFERENCE_FRAME rf[2], int row_offset,
- int col_offset, CANDIDATE_MV *ref_mv_stack,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int_mv *gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- uint8_t *refmv_count) {
+static void scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col,
+ const MV_REFERENCE_FRAME rf[2], int row_offset,
+ int col_offset, CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_match_count, uint8_t *newmv_count,
+ int_mv *gm_mv_candidates,
+ uint8_t refmv_count[MODE_CTX_REF_FRAMES]) {
const TileInfo *const tile = &xd->tile;
POSITION mi_pos;
- uint8_t newmv_count = 0;
mi_pos.row = row_offset;
mi_pos.col = col_offset;
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos) &&
- *refmv_count < MAX_REF_MV_STACK_SIZE) {
- const MODE_INFO *const candidate_mi =
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) {
+ const MB_MODE_INFO *const candidate =
xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
const int len = mi_size_wide[BLOCK_8X8];
-#if CONFIG_AMVR
- newmv_count += add_ref_mv_candidate(
- candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- mi_pos.col, 2, cm->cur_frame_mv_precision_level);
-#else
- newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
- refmv_count, ref_mv_stack,
- cm->allow_high_precision_mv, len, block,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- mi_pos.col, 2);
-#endif
+ add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+ newmv_count, ref_mv_stack, gm_mv_candidates,
+ cm->global_motion, mi_pos.col, 2 * len);
} // Analyze a single 8x8 block motion information.
-
- return newmv_count;
}
static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int mi_row, int mi_col, int bs) {
- const int sb_mi_size = mi_size_wide[cm->sb_size];
+ const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
const int mask_row = mi_row & (sb_mi_size - 1);
const int mask_col = mi_col & (sb_mi_size - 1);
+ if (bs > mi_size_wide[BLOCK_64X64]) return 0;
+
// In a split partition all apart from the bottom right has a top right
int has_tr = !((mask_row & bs) && (mask_col & bs));
@@ -440,22 +298,20 @@ static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
if (xd->n8_w > xd->n8_h)
if (xd->is_sec_rect) has_tr = 0;
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
// The bottom left square of a Vertical A (in the old format) does
// not have a top right as it is decoded before the right hand
// rectangle of the partition
- if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A)
- if ((mask_row & bs) && !(mask_col & bs)) has_tr = 0;
-#endif // CONFIG_EXT_PARTITION_TYPES
+ if (xd->mi[0]->partition == PARTITION_VERT_A) {
+ if (xd->n8_w == xd->n8_h)
+ if (mask_row & bs) has_tr = 0;
+ }
return has_tr;
}
-#if CONFIG_MFMV
-static int check_sb_border(const AV1_COMMON *cm, const int mi_row,
- const int mi_col, const int row_offset,
- const int col_offset) {
- const int sb_mi_size = mi_size_wide[cm->sb_size];
+static int check_sb_border(const int mi_row, const int mi_col,
+ const int row_offset, const int col_offset) {
+ const int sb_mi_size = mi_size_wide[BLOCK_64X64];
const int row = mi_row & (sb_mi_size - 1);
const int col = mi_col & (sb_mi_size - 1);
@@ -466,513 +322,307 @@ static int check_sb_border(const AV1_COMMON *cm, const int mi_row,
return 1;
}
-static int add_tpl_ref_mv(const AV1_COMMON *cm,
- const MV_REF *prev_frame_mvs_base,
- const MACROBLOCKD *xd, int mi_row, int mi_col,
- MV_REFERENCE_FRAME ref_frame, int blk_row,
- int blk_col, uint8_t *refmv_count,
- CANDIDATE_MV *ref_mv_stack, int16_t *mode_context) {
- (void)prev_frame_mvs_base;
+static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+ int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
+ int blk_row, int blk_col, int_mv *gm_mv_candidates,
+ uint8_t refmv_count[MODE_CTX_REF_FRAMES],
+ CANDIDATE_MV ref_mv_stacks[][MAX_REF_MV_STACK_SIZE],
+ int16_t *mode_context) {
POSITION mi_pos;
int idx;
- int coll_blk_count = 0;
const int weight_unit = 1; // mi_size_wide[BLOCK_8X8];
-#if CONFIG_MV_COMPRESS
mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
-#else
- mi_pos.row = blk_row;
- mi_pos.col = blk_col;
-#endif
- if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos))
- return coll_blk_count;
+ if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) return 0;
- const TPL_MV_REF *prev_frame_mvs = cm->cur_frame->tpl_mvs +
- (mi_row + mi_pos.row) * cm->mi_stride +
- (mi_col + mi_pos.col);
+ const TPL_MV_REF *prev_frame_mvs =
+ cm->tpl_mvs + ((mi_row + mi_pos.row) >> 1) * (cm->mi_stride >> 1) +
+ ((mi_col + mi_pos.col) >> 1);
MV_REFERENCE_FRAME rf[2];
av1_set_ref_frame(rf, ref_frame);
if (rf[1] == NONE_FRAME) {
- for (int i = 0; i < MFMV_STACK_SIZE; ++i) {
- if (prev_frame_mvs->mfmv[ref_frame - LAST_FRAME][i].as_int !=
- INVALID_MV) {
- int_mv this_refmv = prev_frame_mvs->mfmv[ref_frame - LAST_FRAME][i];
- lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
-
- if (blk_row == 0 && blk_col == 0)
- if (abs(this_refmv.as_mv.row) >= 16 ||
- abs(this_refmv.as_mv.col) >= 16)
- mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
-
- for (idx = 0; idx < *refmv_count; ++idx)
- if (abs(this_refmv.as_mv.row - ref_mv_stack[idx].this_mv.as_mv.row) <
- 4 &&
- abs(this_refmv.as_mv.col - ref_mv_stack[idx].this_mv.as_mv.col) <
- 4)
- break;
-
- if (idx < *refmv_count) ref_mv_stack[idx].weight += 2 * weight_unit;
-
- if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
- // TODO(jingning): Hard coded context number. Need to make it better
- // sense.
- ref_mv_stack[idx].pred_diff[0] = 1;
- ref_mv_stack[idx].weight = 2 * weight_unit;
- ++(*refmv_count);
- }
+ int cur_frame_index = cm->cur_frame->cur_frame_offset;
+ int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
+ int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
+ int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
+ CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[rf[0]];
+
+ if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
+ int_mv this_refmv;
+
+ get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+ cur_offset_0, prev_frame_mvs->ref_frame_offset);
+ lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
+ cm->cur_frame_force_integer_mv);
+
+ if (blk_row == 0 && blk_col == 0)
+ if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
+ abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16)
+ mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
+
+ for (idx = 0; idx < refmv_count[rf[0]]; ++idx)
+ if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
- ++coll_blk_count;
+ if (idx < refmv_count[rf[0]]) ref_mv_stack[idx].weight += 2 * weight_unit;
+
+ if (idx == refmv_count[rf[0]] &&
+ refmv_count[rf[0]] < MAX_REF_MV_STACK_SIZE) {
+ ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+ ref_mv_stack[idx].weight = 2 * weight_unit;
+ ++(refmv_count[rf[0]]);
}
+ return 1;
}
} else {
// Process compound inter mode
- for (int i = 0; i < MFMV_STACK_SIZE; ++i) {
- if (prev_frame_mvs->mfmv[rf[0] - LAST_FRAME][i].as_int != INVALID_MV &&
- prev_frame_mvs->mfmv[rf[1] - LAST_FRAME][i].as_int != INVALID_MV) {
- int_mv this_refmv = prev_frame_mvs->mfmv[rf[0] - LAST_FRAME][i];
- int_mv comp_refmv = prev_frame_mvs->mfmv[rf[1] - LAST_FRAME][i];
- lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
- lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv);
-
- if (blk_row == 0 && blk_col == 0)
- if (abs(this_refmv.as_mv.row) >= 16 ||
- abs(this_refmv.as_mv.col) >= 16 ||
- abs(comp_refmv.as_mv.row) >= 16 ||
- abs(comp_refmv.as_mv.col) >= 16)
- mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
-
- for (idx = 0; idx < *refmv_count; ++idx)
- if (abs(this_refmv.as_mv.row - ref_mv_stack[idx].this_mv.as_mv.row) <
- 4 &&
- abs(this_refmv.as_mv.col - ref_mv_stack[idx].this_mv.as_mv.col) <
- 4 &&
- abs(comp_refmv.as_mv.row - ref_mv_stack[idx].comp_mv.as_mv.row) <
- 4 &&
- abs(comp_refmv.as_mv.col - ref_mv_stack[idx].comp_mv.as_mv.col) <
- 4)
- break;
-
- if (idx < *refmv_count) ref_mv_stack[idx].weight += 2 * weight_unit;
-
- if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
- ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
- // TODO(jingning): Hard coded context number. Need to make it better
- // sense.
- ref_mv_stack[idx].pred_diff[0] = 1;
- ref_mv_stack[idx].pred_diff[1] = 1;
- ref_mv_stack[idx].weight = 2 * weight_unit;
- ++(*refmv_count);
- }
+ int cur_frame_index = cm->cur_frame->cur_frame_offset;
+ int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
+ int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
+
+ int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
+ int buf_idx_1 = cm->frame_refs[FWD_RF_OFFSET(rf[1])].idx;
+ int frame1_index = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset;
+ int cur_offset_1 = get_relative_dist(cm, cur_frame_index, frame1_index);
+ CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[ref_frame];
+
+ if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
+ int_mv this_refmv;
+ int_mv comp_refmv;
+ get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+ cur_offset_0, prev_frame_mvs->ref_frame_offset);
+ get_mv_projection(&comp_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+ cur_offset_1, prev_frame_mvs->ref_frame_offset);
- ++coll_blk_count;
- }
- }
- }
-
- return coll_blk_count;
-}
-#else
-static int add_col_ref_mv(const AV1_COMMON *cm,
- const MV_REF *prev_frame_mvs_base,
- int prev_frame_mvs_stride, const MACROBLOCKD *xd,
- int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
- int blk_row, int blk_col, uint8_t *refmv_count,
- CANDIDATE_MV *ref_mv_stack, int16_t *mode_context) {
-#if CONFIG_TMV
- const MV_REF *prev_frame_mvs = prev_frame_mvs_base +
- ((blk_row + 1) >> 1) * prev_frame_mvs_stride +
- ((blk_col + 1) >> 1);
-#else
- const MV_REF *prev_frame_mvs =
- prev_frame_mvs_base + blk_row * prev_frame_mvs_stride + blk_col;
-#endif
- POSITION mi_pos;
- int ref, idx;
- int coll_blk_count = 0;
- const int weight_unit = mi_size_wide[BLOCK_8X8];
-
-#if CONFIG_TMV
- mi_pos.row = blk_row;
- mi_pos.col = blk_col;
-#else
-#if CONFIG_MV_COMPRESS
- mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
- mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
-#else
- mi_pos.row = blk_row;
- mi_pos.col = blk_col;
-#endif
-#endif // CONFIG_TMV
-
- if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos))
- return coll_blk_count;
- for (ref = 0; ref < 2; ++ref) {
- if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
- int_mv this_refmv = prev_frame_mvs->mv[ref];
-#if CONFIG_AMVR
lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
- cm->cur_frame_mv_precision_level);
-#else
- lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
-#endif
+ cm->cur_frame_force_integer_mv);
+ lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv,
+ cm->cur_frame_force_integer_mv);
-#if CONFIG_OPT_REF_MV
if (blk_row == 0 && blk_col == 0)
-#endif
- {
- if (abs(this_refmv.as_mv.row) >= 16 || abs(this_refmv.as_mv.col) >= 16)
- mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
- }
-
- for (idx = 0; idx < *refmv_count; ++idx)
- if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+ if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
+ abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16 ||
+ abs(comp_refmv.as_mv.row - gm_mv_candidates[1].as_mv.row) >= 16 ||
+ abs(comp_refmv.as_mv.col - gm_mv_candidates[1].as_mv.col) >= 16)
+ mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
+
+ for (idx = 0; idx < refmv_count[ref_frame]; ++idx)
+ if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int &&
+ comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int)
+ break;
- if (idx < *refmv_count) ref_mv_stack[idx].weight += 2 * weight_unit;
+ if (idx < refmv_count[ref_frame])
+ ref_mv_stack[idx].weight += 2 * weight_unit;
- if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+ if (idx == refmv_count[ref_frame] &&
+ refmv_count[ref_frame] < MAX_REF_MV_STACK_SIZE) {
ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
- ref_mv_stack[idx].pred_diff[0] =
- av1_get_pred_diff_ctx(prev_frame_mvs->pred_mv[ref], this_refmv);
+ ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
ref_mv_stack[idx].weight = 2 * weight_unit;
- ++(*refmv_count);
+ ++(refmv_count[ref_frame]);
}
-
- ++coll_blk_count;
+ return 1;
}
}
-
- return coll_blk_count;
+ return 0;
}
-#endif
-
-static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MV_REFERENCE_FRAME ref_frame,
- uint8_t *refmv_count, CANDIDATE_MV *ref_mv_stack,
- int_mv *mv_ref_list,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int_mv *gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- int block, int mi_row, int mi_col,
- int16_t *mode_context) {
- int idx, nearest_refmv_count = 0;
- uint8_t newmv_count = 0;
- CANDIDATE_MV tmp_mv;
- int len, nr_len;
-
-#if CONFIG_TMV
- const int prev_frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
- const int tmi_row = mi_row & 0xfffe;
- const int tmi_col = mi_col & 0xfffe;
- const MV_REF *const prev_frame_mvs_base =
- cm->use_prev_frame_mvs
- ? cm->prev_frame->mvs + (tmi_row >> 1) * prev_frame_mvs_stride +
- (tmi_col >> 1)
- : NULL;
-#else
- const int prev_frame_mvs_stride = cm->mi_cols;
-#if CONFIG_MV_COMPRESS
- const MV_REF *const prev_frame_mvs_base =
- cm->use_prev_frame_mvs
- ? cm->prev_frame->mvs +
- (((mi_row >> 1) << 1) + 1) * prev_frame_mvs_stride +
- ((mi_col >> 1) << 1) + 1
- : NULL;
-#else
- const MV_REF *const prev_frame_mvs_base =
- cm->use_prev_frame_mvs
- ? cm->prev_frame->mvs + mi_row * prev_frame_mvs_stride + mi_col
- : NULL;
-#endif
-#endif // CONFIG_TMV
+static void setup_ref_mv_list(
+ const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame,
+ uint8_t refmv_count[MODE_CTX_REF_FRAMES],
+ CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+ int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates,
+ int mi_row, int mi_col, int16_t *mode_context) {
const int bs = AOMMAX(xd->n8_w, xd->n8_h);
const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
MV_REFERENCE_FRAME rf[2];
const TileInfo *const tile = &xd->tile;
int max_row_offset = 0, max_col_offset = 0;
-#if CONFIG_CB4X4
const int row_adj = (xd->n8_h < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01);
const int col_adj = (xd->n8_w < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01);
-#endif
int processed_rows = 0;
int processed_cols = 0;
- int row_offset, col_offset;
av1_set_ref_frame(rf, ref_frame);
mode_context[ref_frame] = 0;
- *refmv_count = 0;
+ refmv_count[ref_frame] = 0;
// Find valid maximum row/col offset.
if (xd->up_available) {
-#if CONFIG_CB4X4
- max_row_offset = -(MVREF_ROWS << 1) + row_adj;
-#else
- max_row_offset = -MVREF_ROWS;
-#endif
+ max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj;
+
+ if (xd->n8_h < mi_size_high[BLOCK_8X8])
+ max_row_offset = -(2 << 1) + row_adj;
+
max_row_offset =
- find_valid_row_offset(tile, mi_row, cm->mi_rows, cm, max_row_offset);
+ find_valid_row_offset(tile, mi_row, cm->mi_rows, max_row_offset);
}
if (xd->left_available) {
-#if CONFIG_CB4X4
- max_col_offset = -(MVREF_COLS << 1) + col_adj;
-#else
- max_col_offset = -MVREF_COLS;
-#endif
+ max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj;
+
+ if (xd->n8_w < mi_size_wide[BLOCK_8X8])
+ max_col_offset = -(2 << 1) + col_adj;
+
max_col_offset = find_valid_col_offset(tile, mi_col, max_col_offset);
}
+ uint8_t col_match_count = 0;
+ uint8_t row_match_count = 0;
+ uint8_t newmv_count = 0;
+
// Scan the first above row mode info. row_offset = -1;
if (abs(max_row_offset) >= 1)
- newmv_count +=
- scan_row_mbmi(cm, xd, mi_col, block, rf, -1, ref_mv_stack, refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- max_row_offset, &processed_rows);
+ scan_row_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
+ &refmv_count[ref_frame], &row_match_count, &newmv_count,
+ gm_mv_candidates, max_row_offset, &processed_rows);
// Scan the first left column mode info. col_offset = -1;
if (abs(max_col_offset) >= 1)
- newmv_count +=
- scan_col_mbmi(cm, xd, mi_row, block, rf, -1, ref_mv_stack, refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- max_col_offset, &processed_cols);
+ scan_col_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
+ &refmv_count[ref_frame], &col_match_count, &newmv_count,
+ gm_mv_candidates, max_col_offset, &processed_cols);
// Check top-right boundary
if (has_tr)
- newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
- xd->n8_w, ref_mv_stack,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- refmv_count);
-
- nearest_refmv_count = *refmv_count;
-
- for (idx = 0; idx < nearest_refmv_count; ++idx)
- ref_mv_stack[idx].weight += REF_CAT_LEVEL;
-
-#if CONFIG_MFMV
- int blk_row, blk_col;
- int coll_blk_count = 0;
- int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n8_h);
- int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n8_w);
-
- int tpl_sample_pos[9][2] = {
- { -2, hoffset }, { 0, hoffset }, { voffset, hoffset },
- { voffset, 0 }, { voffset, -2 }, { voffset, -4 },
- { -4, hoffset }, { voffset, 4 }, { 2, hoffset + 4 },
- };
- int i;
-
- for (blk_row = 0; blk_row < xd->n8_h; blk_row += mi_size_high[BLOCK_8X8]) {
- for (blk_col = 0; blk_col < xd->n8_w; blk_col += mi_size_wide[BLOCK_8X8]) {
- // (TODO: yunqing) prev_frame_mvs_base is not used here, tpl_mvs is used.
- // Can be modified the same way.
- int is_available = add_tpl_ref_mv(
- cm, prev_frame_mvs_base, xd, mi_row, mi_col, ref_frame, blk_row,
- blk_col, refmv_count, ref_mv_stack, mode_context);
- if (blk_row == 0 && blk_col == 0) coll_blk_count = is_available;
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->n8_w,
+ ref_mv_stack[ref_frame], &row_match_count, &newmv_count,
+ gm_mv_candidates, &refmv_count[ref_frame]);
+
+ uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0);
+ uint8_t nearest_refmv_count = refmv_count[ref_frame];
+
+ // TODO(yunqing): for comp_search, do it for all 3 cases.
+ for (int idx = 0; idx < nearest_refmv_count; ++idx)
+ ref_mv_stack[ref_frame][idx].weight += REF_CAT_LEVEL;
+
+ if (cm->allow_ref_frame_mvs) {
+ int is_available = 0;
+ const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n8_h);
+ const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n8_w);
+ const int blk_row_end = AOMMIN(xd->n8_h, mi_size_high[BLOCK_64X64]);
+ const int blk_col_end = AOMMIN(xd->n8_w, mi_size_wide[BLOCK_64X64]);
+
+ const int tpl_sample_pos[3][2] = {
+ { voffset, -2 },
+ { voffset, hoffset },
+ { voffset - 2, hoffset },
+ };
+ const int allow_extension = (xd->n8_h >= mi_size_high[BLOCK_8X8]) &&
+ (xd->n8_h < mi_size_high[BLOCK_64X64]) &&
+ (xd->n8_w >= mi_size_wide[BLOCK_8X8]) &&
+ (xd->n8_w < mi_size_wide[BLOCK_64X64]);
+
+ int step_h = (xd->n8_h >= mi_size_high[BLOCK_64X64])
+ ? mi_size_high[BLOCK_16X16]
+ : mi_size_high[BLOCK_8X8];
+ int step_w = (xd->n8_w >= mi_size_wide[BLOCK_64X64])
+ ? mi_size_wide[BLOCK_16X16]
+ : mi_size_wide[BLOCK_8X8];
+
+ for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) {
+ for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) {
+ int ret = add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row,
+ blk_col, gm_mv_candidates, refmv_count,
+ ref_mv_stack, mode_context);
+ if (blk_row == 0 && blk_col == 0) is_available = ret;
+ }
}
- }
- if (coll_blk_count == 0) mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+ if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
- for (i = 0; i < 9; ++i) {
- blk_row = tpl_sample_pos[i][0];
- blk_col = tpl_sample_pos[i][1];
+ for (int i = 0; i < 3 && allow_extension; ++i) {
+ const int blk_row = tpl_sample_pos[i][0];
+ const int blk_col = tpl_sample_pos[i][1];
- if (!check_sb_border(cm, mi_row, mi_col, blk_row, blk_col)) continue;
- // (TODO: yunqing) prev_frame_mvs_base is not used here, tpl_mvs is used.
- // Can be modified the same way.
- coll_blk_count += add_tpl_ref_mv(cm, prev_frame_mvs_base, xd, mi_row,
- mi_col, ref_frame, blk_row, blk_col,
- refmv_count, ref_mv_stack, mode_context);
- }
-#else
-#if CONFIG_TEMPMV_SIGNALING
- if (cm->use_prev_frame_mvs && rf[1] == NONE_FRAME)
-#else
- if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame &&
- rf[1] == NONE_FRAME)
-#endif
- {
- int blk_row, blk_col;
- int coll_blk_count = 0;
-#if CONFIG_CB4X4
- const int mi_step = (xd->n8_w == 1 || xd->n8_h == 1)
- ? mi_size_wide[BLOCK_8X8]
- : mi_size_wide[BLOCK_16X16];
-#else
- const int mi_step = mi_size_wide[BLOCK_16X16];
-#endif
-
-#if CONFIG_TPL_MV
- // Modified sample positions to be consistent with frame_mvs
- // spatial resolution.
- int tpl_sample_pos[5][2] = { { -1, xd->n8_w },
- { 0, xd->n8_w },
- { xd->n8_h, xd->n8_w },
- { xd->n8_h, 0 },
- { xd->n8_h, -1 } };
- int i;
-#endif
-
- for (blk_row = 0; blk_row < xd->n8_h; blk_row += mi_step) {
- for (blk_col = 0; blk_col < xd->n8_w; blk_col += mi_step) {
-#if CONFIG_TMV
- int is_available =
- add_col_ref_mv(cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd,
- tmi_row, tmi_col, ref_frame, blk_row, blk_col,
- refmv_count, ref_mv_stack, mode_context);
-#else
- int is_available =
- add_col_ref_mv(cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd,
- mi_row, mi_col, ref_frame, blk_row, blk_col,
- refmv_count, ref_mv_stack, mode_context);
-#endif // CONFIG_TMV
-#if CONFIG_OPT_REF_MV
- if (blk_row == 0 && blk_col == 0) coll_blk_count = is_available;
-#else
- coll_blk_count += is_available;
-#endif
- }
- }
-
-#if CONFIG_TPL_MV
- for (i = 0; i < 5; ++i) {
- blk_row = tpl_sample_pos[i][0];
- blk_col = tpl_sample_pos[i][1];
-#if CONFIG_TMV
- coll_blk_count += add_col_ref_mv(
- cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd, tmi_row, tmi_col,
- ref_frame, blk_row, blk_col, refmv_count, ref_mv_stack, mode_context);
-#else
- coll_blk_count += add_col_ref_mv(
- cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd, mi_row, mi_col,
- ref_frame, blk_row, blk_col, refmv_count, ref_mv_stack, mode_context);
-#endif // CONFIG_TMV
+ if (!check_sb_border(mi_row, mi_col, blk_row, blk_col)) continue;
+ add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row, blk_col,
+ gm_mv_candidates, refmv_count, ref_mv_stack, mode_context);
}
-#endif
-
- if (coll_blk_count == 0) mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
- } else {
- mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
}
-#endif
+
+ uint8_t dummy_newmv_count = 0;
// Scan the second outer area.
- scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, -1, ref_mv_stack,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- refmv_count);
- for (idx = 2; idx <= MVREF_ROWS; ++idx) {
-#if CONFIG_CB4X4
- row_offset = -(idx << 1) + 1 + row_adj;
- col_offset = -(idx << 1) + 1 + col_adj;
-#else
- row_offset = -idx;
- col_offset = -idx;
-#endif
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack[ref_frame],
+ &row_match_count, &dummy_newmv_count, gm_mv_candidates,
+ &refmv_count[ref_frame]);
+
+ for (int idx = 2; idx <= MVREF_ROW_COLS; ++idx) {
+ const int row_offset = -(idx << 1) + 1 + row_adj;
+ const int col_offset = -(idx << 1) + 1 + col_adj;
if (abs(row_offset) <= abs(max_row_offset) &&
abs(row_offset) > processed_rows)
- scan_row_mbmi(cm, xd, mi_col, block, rf, row_offset, ref_mv_stack,
- refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+ scan_row_mbmi(cm, xd, mi_row, mi_col, rf, row_offset,
+ ref_mv_stack[ref_frame], &refmv_count[ref_frame],
+ &row_match_count, &dummy_newmv_count, gm_mv_candidates,
max_row_offset, &processed_rows);
if (abs(col_offset) <= abs(max_col_offset) &&
abs(col_offset) > processed_cols)
- scan_col_mbmi(cm, xd, mi_row, block, rf, col_offset, ref_mv_stack,
- refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+ scan_col_mbmi(cm, xd, mi_row, mi_col, rf, col_offset,
+ ref_mv_stack[ref_frame], &refmv_count[ref_frame],
+ &col_match_count, &dummy_newmv_count, gm_mv_candidates,
max_col_offset, &processed_cols);
}
-#if CONFIG_CB4X4
- col_offset = -(MVREF_COLS << 1) + 1 + col_adj;
-#else
- col_offset = -MVREF_COLS;
-#endif
- if (abs(col_offset) <= abs(max_col_offset) &&
- abs(col_offset) > processed_cols)
- scan_col_mbmi(cm, xd, mi_row, block, rf, col_offset, ref_mv_stack,
- refmv_count,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- gm_mv_candidates,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- max_col_offset, &processed_cols);
-
- switch (nearest_refmv_count) {
- case 0: mode_context[ref_frame] |= 0;
-#if !CONFIG_OPT_REF_MV
- if (*refmv_count >= 1) mode_context[ref_frame] |= 1;
- if (*refmv_count == 1)
+ uint8_t ref_match_count = (row_match_count > 0) + (col_match_count > 0);
+
+ switch (nearest_match) {
+ case 0:
+ mode_context[ref_frame] |= 0;
+ if (ref_match_count >= 1) mode_context[ref_frame] |= 1;
+ if (ref_match_count == 1)
mode_context[ref_frame] |= (1 << REFMV_OFFSET);
- else if (*refmv_count >= 2)
+ else if (ref_match_count >= 2)
mode_context[ref_frame] |= (2 << REFMV_OFFSET);
-#endif
break;
- case 1: mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
-#if CONFIG_OPT_REF_MV
- mode_context[ref_frame] |= (3 << REFMV_OFFSET);
-#else
- if (*refmv_count == 1)
+ case 1:
+ mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+ if (ref_match_count == 1)
mode_context[ref_frame] |= (3 << REFMV_OFFSET);
- else if (*refmv_count >= 2)
+ else if (ref_match_count >= 2)
mode_context[ref_frame] |= (4 << REFMV_OFFSET);
-#endif
break;
-
case 2:
default:
- if (newmv_count >= 2)
+ if (newmv_count >= 1)
mode_context[ref_frame] |= 4;
- else if (newmv_count == 1)
- mode_context[ref_frame] |= 5;
else
- mode_context[ref_frame] |= 6;
+ mode_context[ref_frame] |= 5;
mode_context[ref_frame] |= (5 << REFMV_OFFSET);
break;
}
// Rank the likelihood and assign nearest and near mvs.
- len = nearest_refmv_count;
+ int len = nearest_refmv_count;
while (len > 0) {
- nr_len = 0;
- for (idx = 1; idx < len; ++idx) {
- if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
- tmp_mv = ref_mv_stack[idx - 1];
- ref_mv_stack[idx - 1] = ref_mv_stack[idx];
- ref_mv_stack[idx] = tmp_mv;
+ int nr_len = 0;
+ for (int idx = 1; idx < len; ++idx) {
+ if (ref_mv_stack[ref_frame][idx - 1].weight <
+ ref_mv_stack[ref_frame][idx].weight) {
+ CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
+ ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
+ ref_mv_stack[ref_frame][idx] = tmp_mv;
nr_len = idx;
}
}
len = nr_len;
}
- len = *refmv_count;
+ len = refmv_count[ref_frame];
while (len > nearest_refmv_count) {
- nr_len = nearest_refmv_count;
- for (idx = nearest_refmv_count + 1; idx < len; ++idx) {
- if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
- tmp_mv = ref_mv_stack[idx - 1];
- ref_mv_stack[idx - 1] = ref_mv_stack[idx];
- ref_mv_stack[idx] = tmp_mv;
+ int nr_len = nearest_refmv_count;
+ for (int idx = nearest_refmv_count + 1; idx < len; ++idx) {
+ if (ref_mv_stack[ref_frame][idx - 1].weight <
+ ref_mv_stack[ref_frame][idx].weight) {
+ CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
+ ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
+ ref_mv_stack[ref_frame][idx] = tmp_mv;
nr_len = idx;
}
}
@@ -980,595 +630,324 @@ static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
}
if (rf[1] > NONE_FRAME) {
- for (idx = 0; idx < *refmv_count; ++idx) {
- clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
- xd->n8_h << MI_SIZE_LOG2, xd);
- clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
- xd->n8_h << MI_SIZE_LOG2, xd);
- }
- } else {
- for (idx = 0; idx < AOMMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
- mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
- clamp_mv_ref(&mv_ref_list[idx].as_mv, xd->n8_w << MI_SIZE_LOG2,
- xd->n8_h << MI_SIZE_LOG2, xd);
- }
- }
-}
+ // TODO(jingning, yunqing): Refactor and consolidate the compound and
+ // single reference frame modes. Reduce unnecessary redundancy.
+ if (refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES) {
+ int_mv ref_id[2][2], ref_diff[2][2];
+ int ref_id_count[2] = { 0 }, ref_diff_count[2] = { 0 };
+
+ int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n8_w);
+ mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
+ int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n8_h);
+ mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
+ int mi_size = AOMMIN(mi_width, mi_height);
+
+ for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size;) {
+ const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx];
+ const int candidate_bsize = candidate->sb_type;
+
+ for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+ MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+ for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+ if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+ ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+ ++ref_id_count[cmp_idx];
+ } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+ int_mv this_mv = candidate->mv[rf_idx];
+ if (cm->ref_frame_sign_bias[can_rf] !=
+ cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+ this_mv.as_mv.row = -this_mv.as_mv.row;
+ this_mv.as_mv.col = -this_mv.as_mv.col;
+ }
+ ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+ ++ref_diff_count[cmp_idx];
+ }
+ }
+ }
+ idx += mi_size_wide[candidate_bsize];
+ }
-// This function searches the neighbourhood of a given MB/SB
-// to try and find candidate reference vectors.
-static void find_mv_refs_idx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list, int block, int mi_row,
- int mi_col, find_mv_refs_sync sync,
- void *const data, int16_t *mode_context,
- int_mv zeromv) {
- const int *ref_sign_bias = cm->ref_frame_sign_bias;
- const int sb_mi_size = mi_size_wide[cm->sb_size];
- int i, refmv_count = 0;
- int different_ref_found = 0;
- int context_counter = 0;
-
-#if CONFIG_TMV
- int tmi_row = mi_row & 0xfffe;
- int tmi_col = mi_col & 0xfffe;
- POSITION mi_pos = { 0, 0 };
- int inside = is_inside(&xd->tile, tmi_col, tmi_row, cm->mi_rows, cm, &mi_pos);
- const MV_REF *const prev_frame_mvs =
- cm->use_prev_frame_mvs && inside
- ? cm->prev_frame->mvs + (tmi_row >> 1) * ((cm->mi_cols + 1) >> 1) +
- (tmi_col >> 1)
- : NULL;
-#else
-#if CONFIG_MV_COMPRESS
- const TileInfo *const tile_ = &xd->tile;
- int mi_row_end = tile_->mi_row_end;
- int mi_col_end = tile_->mi_col_end;
- const MV_REF *const prev_frame_mvs =
- cm->use_prev_frame_mvs
- ? cm->prev_frame->mvs +
- AOMMIN(((mi_row >> 1) << 1) + 1 + (((xd->n8_h - 1) >> 1) << 1),
- mi_row_end - 1) *
- cm->mi_cols +
- AOMMIN(((mi_col >> 1) << 1) + 1 + (((xd->n8_w - 1) >> 1) << 1),
- mi_col_end - 1)
- : NULL;
-#else
- const MV_REF *const prev_frame_mvs =
- cm->use_prev_frame_mvs
- ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
- : NULL;
-#endif
-#endif // CONFIG_TMV
-
-#if CONFIG_INTRABC
- assert(IMPLIES(ref_frame == INTRA_FRAME, cm->use_prev_frame_mvs == 0));
-#endif
- const TileInfo *const tile = &xd->tile;
- const BLOCK_SIZE bsize = mi->mbmi.sb_type;
- const int bw = block_size_wide[AOMMAX(bsize, BLOCK_8X8)];
- const int bh = block_size_high[AOMMAX(bsize, BLOCK_8X8)];
- POSITION mv_ref_search[MVREF_NEIGHBOURS];
- const int num_8x8_blocks_wide = num_8x8_blocks_wide_lookup[bsize];
- const int num_8x8_blocks_high = num_8x8_blocks_high_lookup[bsize];
- mv_ref_search[0].row = num_8x8_blocks_high - 1;
- mv_ref_search[0].col = -1;
- mv_ref_search[1].row = -1;
- mv_ref_search[1].col = num_8x8_blocks_wide - 1;
- mv_ref_search[2].row = -1;
- mv_ref_search[2].col = (num_8x8_blocks_wide - 1) >> 1;
- mv_ref_search[3].row = (num_8x8_blocks_high - 1) >> 1;
- mv_ref_search[3].col = -1;
- mv_ref_search[4].row = -1;
- mv_ref_search[4].col = -1;
-#if CONFIG_EXT_PARTITION_TYPES
- if (num_8x8_blocks_wide == num_8x8_blocks_high) {
- mv_ref_search[5].row = -1;
- mv_ref_search[5].col = 0;
- mv_ref_search[6].row = 0;
- mv_ref_search[6].col = -1;
- } else {
- mv_ref_search[5].row = -1;
- mv_ref_search[5].col = num_8x8_blocks_wide;
- mv_ref_search[6].row = num_8x8_blocks_high;
- mv_ref_search[6].col = -1;
- }
-#else
- mv_ref_search[5].row = -1;
- mv_ref_search[5].col = num_8x8_blocks_wide;
- mv_ref_search[6].row = num_8x8_blocks_high;
- mv_ref_search[6].col = -1;
-#endif // CONFIG_EXT_PARTITION_TYPES
- mv_ref_search[7].row = -1;
- mv_ref_search[7].col = -3;
- mv_ref_search[8].row = num_8x8_blocks_high - 1;
- mv_ref_search[8].col = -3;
-
-#if CONFIG_CB4X4
- for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
- mv_ref_search[i].row *= 2;
- mv_ref_search[i].col *= 2;
- }
-#endif // CONFIG_CB4X4
-
- // The nearest 2 blocks are treated differently
- // if the size < 8x8 we get the mv from the bmi substructure,
- // and we also need to keep a mode count.
- for (i = 0; i < 2; ++i) {
- const POSITION *const mv_ref = &mv_ref_search[i];
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, mv_ref)) {
- const MODE_INFO *const candidate_mi =
- xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
- // Keep counts for entropy encoding.
- context_counter += mode_2_counter[candidate->mode];
- different_ref_found = 1;
-
- if (candidate->ref_frame[0] == ref_frame)
- ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
- refmv_count, mv_ref_list, bw, bh, xd, Done);
- else if (candidate->ref_frame[1] == ref_frame)
- ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
- refmv_count, mv_ref_list, bw, bh, xd, Done);
- }
- }
+ for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size;) {
+ const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1];
+ const int candidate_bsize = candidate->sb_type;
+
+ for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+ MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+ for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+ if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+ ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+ ++ref_id_count[cmp_idx];
+ } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+ int_mv this_mv = candidate->mv[rf_idx];
+ if (cm->ref_frame_sign_bias[can_rf] !=
+ cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+ this_mv.as_mv.row = -this_mv.as_mv.row;
+ this_mv.as_mv.col = -this_mv.as_mv.col;
+ }
+ ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+ ++ref_diff_count[cmp_idx];
+ }
+ }
+ }
+ idx += mi_size_high[candidate_bsize];
+ }
- // Check the rest of the neighbors in much the same way
- // as before except we don't need to keep track of sub blocks or
- // mode counts.
- for (; i < MVREF_NEIGHBOURS; ++i) {
- const POSITION *const mv_ref = &mv_ref_search[i];
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, mv_ref)) {
- const MB_MODE_INFO *const candidate =
- !xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]
- ? NULL
- : &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
- if (candidate == NULL) continue;
- if ((mi_row & (sb_mi_size - 1)) + mv_ref->row >= sb_mi_size ||
- (mi_col & (sb_mi_size - 1)) + mv_ref->col >= sb_mi_size)
- continue;
- different_ref_found = 1;
-
- if (candidate->ref_frame[0] == ref_frame)
- ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, bw, bh, xd,
- Done);
- else if (candidate->ref_frame[1] == ref_frame)
- ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, bw, bh, xd,
- Done);
+ // Build up the compound mv predictor
+ int_mv comp_list[3][2];
+
+ for (int idx = 0; idx < 2; ++idx) {
+ int comp_idx = 0;
+ for (int list_idx = 0; list_idx < ref_id_count[idx] && comp_idx < 2;
+ ++list_idx, ++comp_idx)
+ comp_list[comp_idx][idx] = ref_id[idx][list_idx];
+ for (int list_idx = 0; list_idx < ref_diff_count[idx] && comp_idx < 2;
+ ++list_idx, ++comp_idx)
+ comp_list[comp_idx][idx] = ref_diff[idx][list_idx];
+ for (; comp_idx < 3; ++comp_idx)
+ comp_list[comp_idx][idx] = gm_mv_candidates[idx];
+ }
+
+ if (refmv_count[ref_frame]) {
+ assert(refmv_count[ref_frame] == 1);
+ if (comp_list[0][0].as_int ==
+ ref_mv_stack[ref_frame][0].this_mv.as_int &&
+ comp_list[0][1].as_int ==
+ ref_mv_stack[ref_frame][0].comp_mv.as_int) {
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+ comp_list[1][0];
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+ comp_list[1][1];
+ } else {
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+ comp_list[0][0];
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+ comp_list[0][1];
+ }
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+ ++refmv_count[ref_frame];
+ } else {
+ for (int idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) {
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+ comp_list[idx][0];
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+ comp_list[idx][1];
+ ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+ ++refmv_count[ref_frame];
+ }
+ }
}
- }
-// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
-// on windows platform. The sync here is unncessary if use_perv_frame_mvs
-// is 0. But after removing it, there will be hang in the unit test on windows
-// due to several threads waiting for a thread's signal.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
- if (cm->frame_parallel_decode && sync != NULL) {
- sync(data, mi_row);
- }
-#endif
+ assert(refmv_count[ref_frame] >= 2);
- // Check the last frame's mode and mv info.
- if (cm->use_prev_frame_mvs) {
- // Synchronize here for frame parallel decode if sync function is provided.
- if (cm->frame_parallel_decode && sync != NULL) {
- sync(data, mi_row);
+ for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
+ clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
+ xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
+ clamp_mv_ref(&ref_mv_stack[ref_frame][idx].comp_mv.as_mv,
+ xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
}
+ } else {
+ // Handle single reference frame extension
+ int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n8_w);
+ mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
+ int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n8_h);
+ mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
+ int mi_size = AOMMIN(mi_width, mi_height);
+
+ for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size &&
+ refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
+ const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx];
+ const int candidate_bsize = candidate->sb_type;
+
+ // TODO(jingning): Refactor the following code.
+ for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+ if (candidate->ref_frame[rf_idx] > INTRA_FRAME) {
+ int_mv this_mv = candidate->mv[rf_idx];
+ if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] !=
+ cm->ref_frame_sign_bias[ref_frame]) {
+ this_mv.as_mv.row = -this_mv.as_mv.row;
+ this_mv.as_mv.col = -this_mv.as_mv.col;
+ }
+ int stack_idx;
+ for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) {
+ int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv;
+ if (this_mv.as_int == stack_mv.as_int) break;
+ }
- if (prev_frame_mvs->ref_frame[0] == ref_frame) {
- ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, bw, bh,
- xd, Done);
- } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
- ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, bw, bh,
- xd, Done);
- }
- }
+ if (stack_idx == refmv_count[ref_frame]) {
+ ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv;
- // Since we couldn't find 2 mvs from the same reference frame
- // go back through the neighbors and find motion vectors from
- // different reference frames.
- if (different_ref_found) {
- for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
- const POSITION *mv_ref = &mv_ref_search[i];
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, mv_ref)) {
- const MB_MODE_INFO *const candidate =
- !xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]
- ? NULL
- : &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
- if (candidate == NULL) continue;
- if ((mi_row & (sb_mi_size - 1)) + mv_ref->row >= sb_mi_size ||
- (mi_col & (sb_mi_size - 1)) + mv_ref->col >= sb_mi_size)
- continue;
-
- // If the candidate is INTRA we don't want to consider its mv.
- IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
- refmv_count, mv_ref_list, bw, bh, xd, Done);
+ // TODO(jingning): Set an arbitrary small number here. The weight
+ // doesn't matter as long as it is properly initialized.
+ ref_mv_stack[ref_frame][stack_idx].weight = 2;
+ ++refmv_count[ref_frame];
+ }
+ }
}
+ idx += mi_size_wide[candidate_bsize];
}
- }
- // Since we still don't have a candidate we'll try the last frame.
- if (cm->use_prev_frame_mvs) {
- if (prev_frame_mvs->ref_frame[0] != ref_frame &&
- prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
- int_mv mv = prev_frame_mvs->mv[0];
- if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
- ref_sign_bias[ref_frame]) {
- mv.as_mv.row *= -1;
- mv.as_mv.col *= -1;
+ for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size &&
+ refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
+ const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1];
+ const int candidate_bsize = candidate->sb_type;
+
+ // TODO(jingning): Refactor the following code.
+ for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+ if (candidate->ref_frame[rf_idx] > INTRA_FRAME) {
+ int_mv this_mv = candidate->mv[rf_idx];
+ if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] !=
+ cm->ref_frame_sign_bias[ref_frame]) {
+ this_mv.as_mv.row = -this_mv.as_mv.row;
+ this_mv.as_mv.col = -this_mv.as_mv.col;
+ }
+ int stack_idx;
+ for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) {
+ int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv;
+ if (this_mv.as_int == stack_mv.as_int) break;
+ }
+
+ if (stack_idx == refmv_count[ref_frame]) {
+ ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv;
+
+ // TODO(jingning): Set an arbitrary small number here. The weight
+ // doesn't matter as long as it is properly initialized.
+ ref_mv_stack[ref_frame][stack_idx].weight = 2;
+ ++refmv_count[ref_frame];
+ }
+ }
}
- ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+ idx += mi_size_high[candidate_bsize];
}
- if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
- prev_frame_mvs->ref_frame[1] != ref_frame) {
- int_mv mv = prev_frame_mvs->mv[1];
- if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
- ref_sign_bias[ref_frame]) {
- mv.as_mv.row *= -1;
- mv.as_mv.col *= -1;
- }
- ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+ for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
+ clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
+ xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
}
- }
-Done:
- if (mode_context)
- mode_context[ref_frame] = counter_to_context[context_counter];
- for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
- mv_ref_list[i].as_int = zeromv.as_int;
-}
+ if (mv_ref_list != NULL) {
+ for (int idx = refmv_count[ref_frame]; idx < MAX_MV_REF_CANDIDATES; ++idx)
+ mv_ref_list[rf[0]][idx].as_int = gm_mv_candidates[0].as_int;
-// This function keeps a mode count for a given MB/SB
-void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list, int block, int mi_row,
- int mi_col, int16_t *mode_context) {
- int i, refmv_count = 0;
- int context_counter = 0;
- const int bw = block_size_wide[mi->mbmi.sb_type];
- const int bh = block_size_high[mi->mbmi.sb_type];
- const TileInfo *const tile = &xd->tile;
- POSITION mv_ref_search[2];
- const int num_8x8_blocks_wide = mi_size_wide[mi->mbmi.sb_type];
- const int num_8x8_blocks_high = mi_size_high[mi->mbmi.sb_type];
-
- mv_ref_search[0].row = num_8x8_blocks_high - 1;
- mv_ref_search[0].col = -1;
- mv_ref_search[1].row = -1;
- mv_ref_search[1].col = num_8x8_blocks_wide - 1;
-
- // Blank the reference vector list
- memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
-
- // The nearest 2 blocks are examined only.
- // If the size < 8x8, we get the mv from the bmi substructure;
- for (i = 0; i < 2; ++i) {
- const POSITION *const mv_ref = &mv_ref_search[i];
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, mv_ref)) {
- const MODE_INFO *const candidate_mi =
- xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
-
- // Keep counts for entropy encoding.
- context_counter += mode_2_counter[candidate->mode];
-
- if (candidate->ref_frame[0] == ref_frame) {
- ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
- refmv_count, mv_ref_list, bw, bh, xd, Done);
- } else if (candidate->ref_frame[1] == ref_frame) {
- ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
- refmv_count, mv_ref_list, bw, bh, xd, Done);
+ for (int idx = 0;
+ idx < AOMMIN(MAX_MV_REF_CANDIDATES, refmv_count[ref_frame]); ++idx) {
+ mv_ref_list[rf[0]][idx].as_int =
+ ref_mv_stack[ref_frame][idx].this_mv.as_int;
}
}
}
-
-Done:
-
- if (mode_context)
- mode_context[ref_frame] = counter_to_context[context_counter];
}
void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
- int16_t *compound_mode_context, int_mv *mv_ref_list,
- int mi_row, int mi_col, find_mv_refs_sync sync,
- void *const data, int16_t *mode_context) {
+ MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+ CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+ int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+ int_mv *global_mvs, int mi_row, int mi_col,
+ int16_t *mode_context) {
int_mv zeromv[2];
-#if CONFIG_GLOBAL_MOTION
- BLOCK_SIZE bsize = mi->mbmi.sb_type;
-#endif // CONFIG_GLOBAL_MOTION
- int idx, all_zero = 1;
-#if CONFIG_GLOBAL_MOTION
+ BLOCK_SIZE bsize = mi->sb_type;
MV_REFERENCE_FRAME rf[2];
-#endif // CONFIG_GLOBAL_MOTION
-
- av1_update_mv_context(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
- compound_mode_context);
-
-#if CONFIG_GLOBAL_MOTION
- if (!CONFIG_INTRABC || ref_frame != INTRA_FRAME) {
- av1_set_ref_frame(rf, ref_frame);
- zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[rf[0]],
- cm->allow_high_precision_mv, bsize,
- mi_col, mi_row, 0
-#if CONFIG_AMVR
- ,
- cm->cur_frame_mv_precision_level
-#endif
- )
- .as_int;
+ av1_set_ref_frame(rf, ref_frame);
+
+ if (ref_frame < REF_FRAMES) {
+ if (ref_frame != INTRA_FRAME) {
+ global_mvs[ref_frame] = gm_get_motion_vector(
+ &cm->global_motion[ref_frame], cm->allow_high_precision_mv, bsize,
+ mi_col, mi_row, cm->cur_frame_force_integer_mv);
+ } else {
+ global_mvs[ref_frame].as_int = INVALID_MV;
+ }
+ }
+
+ if (ref_frame != INTRA_FRAME) {
+ zeromv[0].as_int =
+ gm_get_motion_vector(&cm->global_motion[rf[0]],
+ cm->allow_high_precision_mv, bsize, mi_col, mi_row,
+ cm->cur_frame_force_integer_mv)
+ .as_int;
zeromv[1].as_int =
(rf[1] != NONE_FRAME)
? gm_get_motion_vector(&cm->global_motion[rf[1]],
cm->allow_high_precision_mv, bsize, mi_col,
- mi_row, 0
-#if CONFIG_AMVR
- ,
- cm->cur_frame_mv_precision_level
-#endif
- )
+ mi_row, cm->cur_frame_force_integer_mv)
.as_int
: 0;
} else {
zeromv[0].as_int = zeromv[1].as_int = 0;
}
-#else
- zeromv[0].as_int = zeromv[1].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
-
- if (ref_frame <= ALTREF_FRAME)
- find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
- sync, data, mode_context, zeromv[0]);
setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- zeromv,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- -1, mi_row, mi_col, mode_context);
- /* Note: If global motion is enabled, then we want to set the ALL_ZERO flag
- iff all of the MVs we could generate with NEARMV/NEARESTMV are equivalent
- to the global motion vector.
- Note: For the following to work properly, the encoder can't throw away
- any global motion models after calling this function, even if they are
- unused. Instead we rely on the recode loop: If any non-IDENTITY model
- is unused, the whole frame will be re-encoded without it.
- The problem is that, otherwise, we can end up in the following situation:
- * Encoder has a global motion model with nonzero translational part,
- and all candidate MVs are zero. So the ALL_ZERO flag is unset.
- * Encoder throws away global motion because it is never used.
- * Decoder sees that there is no global motion and all candidate MVs are
- zero, so sets the ALL_ZERO flag.
- * This leads to an encode/decode mismatch.
- */
- for (idx = 0; idx < AOMMIN(3, *ref_mv_count); ++idx) {
- if (ref_mv_stack[idx].this_mv.as_int != zeromv[0].as_int) all_zero = 0;
- if (ref_frame > ALTREF_FRAME)
- if (ref_mv_stack[idx].comp_mv.as_int != zeromv[1].as_int) all_zero = 0;
- }
- if (*ref_mv_count < 2 && ref_frame <= ALTREF_FRAME) {
- for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
- if (mv_ref_list[idx].as_int != zeromv[0].as_int) all_zero = 0;
- }
-
-#if !CONFIG_OPT_REF_MV
- if (all_zero) mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET);
-#else
- (void)all_zero;
-#endif
+ zeromv, mi_row, mi_col, mode_context);
}
void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
- int_mv *near_mv
-#if CONFIG_AMVR
- ,
- int is_integer
-#endif
- ) {
+ int_mv *near_mv, int is_integer) {
int i;
// Make sure all the candidates are properly clamped etc
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
-#if CONFIG_AMVR
lower_mv_precision(&mvlist[i].as_mv, allow_hp, is_integer);
-#else
- lower_mv_precision(&mvlist[i].as_mv, allow_hp);
-#endif
}
*nearest_mv = mvlist[0];
*near_mv = mvlist[1];
}
-void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int block, int ref, int mi_row, int mi_col,
- CANDIDATE_MV *ref_mv_stack,
- uint8_t *ref_mv_count, int_mv *mv_list,
- int_mv *nearest_mv, int_mv *near_mv) {
- MODE_INFO *const mi = xd->mi[0];
- b_mode_info *bmi = mi->bmi;
- int n;
- int_mv zeromv;
- CANDIDATE_MV tmp_mv;
- uint8_t idx;
- uint8_t above_count = 0, left_count = 0;
- MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE_FRAME };
- *ref_mv_count = 0;
-
- assert(MAX_MV_REF_CANDIDATES == 2);
-
-#if CONFIG_GLOBAL_MOTION
- zeromv.as_int = gm_get_motion_vector(&cm->global_motion[rf[0]],
- cm->allow_high_precision_mv,
- mi->mbmi.sb_type, mi_col, mi_row, block
-#if CONFIG_AMVR
- ,
- cm->cur_frame_mv_precision_level
-#endif
- )
- .as_int;
-#else
- zeromv.as_int = 0;
-#endif
- find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, mi_row,
- mi_col, NULL, NULL, NULL, zeromv);
-
- scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 0, ref_mv_stack,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- &zeromv,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- ref_mv_count);
- above_count = *ref_mv_count;
-
- scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, 0, -1, ref_mv_stack,
-#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- &zeromv,
-#endif // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
- ref_mv_count);
- left_count = *ref_mv_count - above_count;
-
- if (above_count > 1 && left_count > 0) {
- tmp_mv = ref_mv_stack[1];
- ref_mv_stack[1] = ref_mv_stack[above_count];
- ref_mv_stack[above_count] = tmp_mv;
- }
-
- for (idx = 0; idx < *ref_mv_count; ++idx)
- clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
- xd->n8_h << MI_SIZE_LOG2, xd);
-
- for (idx = 0; idx < AOMMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx)
- mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
-
- near_mv->as_int = 0;
- switch (block) {
- case 0:
- nearest_mv->as_int = mv_list[0].as_int;
- near_mv->as_int = mv_list[1].as_int;
- break;
- case 1:
- case 2:
- nearest_mv->as_int = bmi[0].as_mv[ref].as_int;
- for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n)
- if (nearest_mv->as_int != mv_list[n].as_int) {
- near_mv->as_int = mv_list[n].as_int;
- break;
- }
- break;
- case 3: {
- int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
- candidates[0] = bmi[1].as_mv[ref];
- candidates[1] = bmi[0].as_mv[ref];
- candidates[2] = mv_list[0];
- candidates[3] = mv_list[1];
-
- nearest_mv->as_int = bmi[2].as_mv[ref].as_int;
- for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
- if (nearest_mv->as_int != candidates[n].as_int) {
- near_mv->as_int = candidates[n].as_int;
- break;
- }
- break;
- }
- default: assert(0 && "Invalid block index.");
- }
-}
-
-#if CONFIG_FRAME_MARKER
void av1_setup_frame_buf_refs(AV1_COMMON *cm) {
cm->cur_frame->cur_frame_offset = cm->frame_offset;
- int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
- int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
- int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
-
-#if CONFIG_EXT_REFS
- int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
- int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
- int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
- int alt2_buf_idx = cm->frame_refs[ALTREF2_FRAME - LAST_FRAME].idx;
-#endif
-
- if (alt_buf_idx >= 0)
- cm->cur_frame->alt_frame_offset =
- cm->buffer_pool->frame_bufs[alt_buf_idx].cur_frame_offset;
-
- if (lst_buf_idx >= 0)
- cm->cur_frame->lst_frame_offset =
- cm->buffer_pool->frame_bufs[lst_buf_idx].cur_frame_offset;
-
- if (gld_buf_idx >= 0)
- cm->cur_frame->gld_frame_offset =
- cm->buffer_pool->frame_bufs[gld_buf_idx].cur_frame_offset;
-
-#if CONFIG_EXT_REFS
- if (lst2_buf_idx >= 0)
- cm->cur_frame->lst2_frame_offset =
- cm->buffer_pool->frame_bufs[lst2_buf_idx].cur_frame_offset;
-
- if (lst3_buf_idx >= 0)
- cm->cur_frame->lst3_frame_offset =
- cm->buffer_pool->frame_bufs[lst3_buf_idx].cur_frame_offset;
-
- if (bwd_buf_idx >= 0)
- cm->cur_frame->bwd_frame_offset =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].cur_frame_offset;
-
- if (alt2_buf_idx >= 0)
- cm->cur_frame->alt2_frame_offset =
- cm->buffer_pool->frame_bufs[alt2_buf_idx].cur_frame_offset;
-#endif
+
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
+ if (buf_idx >= 0)
+ cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME] =
+ cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
+ }
}
-#if CONFIG_FRAME_SIGN_BIAS
void av1_setup_frame_sign_bias(AV1_COMMON *cm) {
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
- if (buf_idx != INVALID_IDX) {
+ if (cm->seq_params.enable_order_hint && buf_idx != INVALID_IDX) {
const int ref_frame_offset =
cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
cm->ref_frame_sign_bias[ref_frame] =
- (ref_frame_offset <= (int)cm->frame_offset) ? 0 : 1;
+ (get_relative_dist(cm, ref_frame_offset, (int)cm->frame_offset) <= 0)
+ ? 0
+ : 1;
} else {
cm->ref_frame_sign_bias[ref_frame] = 0;
}
}
}
-#endif // CONFIG_FRAME_SIGN_BIAS
-#endif // CONFIG_FRAME_MARKER
-
-#if CONFIG_MFMV
-// Although we assign 32 bit integers, all the values are strictly under 14
-// bits.
-static int div_mult[32] = {
- 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340, 2048, 1820, 1638,
- 1489, 1365, 1260, 1170, 1092, 1024, 963, 910, 862, 819, 780,
- 744, 712, 682, 655, 630, 606, 585, 564, 546, 528,
-};
-
-// TODO(jingning): Consider the use of lookup table for (num / den)
-// altogether.
-static void get_mv_projection(MV *output, MV ref, int num, int den) {
- output->row =
- (int16_t)(ROUND_POWER_OF_TWO(ref.row * num * div_mult[den], 14));
- output->col =
- (int16_t)(ROUND_POWER_OF_TWO(ref.col * num * div_mult[den], 14));
-}
#define MAX_OFFSET_WIDTH 64
-#define MAX_OFFSET_HEIGHT 32
+#define MAX_OFFSET_HEIGHT 0
static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row,
int blk_col, MV mv, int sign_bias) {
- if ((abs(mv.row) >> 3) > MAX_OFFSET_HEIGHT ||
- (abs(mv.col) >> 3) > MAX_OFFSET_WIDTH)
- return 0;
+ const int base_blk_row = (blk_row >> 3) << 3;
+ const int base_blk_col = (blk_col >> 3) << 3;
+
+ const int row_offset = (mv.row >= 0) ? (mv.row >> (4 + MI_SIZE_LOG2))
+ : -((-mv.row) >> (4 + MI_SIZE_LOG2));
+
+ const int col_offset = (mv.col >= 0) ? (mv.col >> (4 + MI_SIZE_LOG2))
+ : -((-mv.col) >> (4 + MI_SIZE_LOG2));
- int row = (sign_bias == 1) ? blk_row - (mv.row >> (3 + MI_SIZE_LOG2))
- : blk_row + (mv.row >> (3 + MI_SIZE_LOG2));
- int col = (sign_bias == 1) ? blk_col - (mv.col >> (3 + MI_SIZE_LOG2))
- : blk_col + (mv.col >> (3 + MI_SIZE_LOG2));
+ int row = (sign_bias == 1) ? blk_row - row_offset : blk_row + row_offset;
+ int col = (sign_bias == 1) ? blk_col - col_offset : blk_col + col_offset;
- if (row < 0 || row >= cm->mi_rows || col < 0 || col >= cm->mi_cols) return 0;
+ if (row < 0 || row >= (cm->mi_rows >> 1) || col < 0 ||
+ col >= (cm->mi_cols >> 1))
+ return 0;
+
+ if (row < base_blk_row - (MAX_OFFSET_HEIGHT >> 3) ||
+ row >= base_blk_row + 8 + (MAX_OFFSET_HEIGHT >> 3) ||
+ col < base_blk_col - (MAX_OFFSET_WIDTH >> 3) ||
+ col >= base_blk_col + 8 + (MAX_OFFSET_WIDTH >> 3))
+ return 0;
*mi_r = row;
*mi_c = col;
@@ -1576,504 +955,209 @@ static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row,
return 1;
}
-static uint32_t mv_sign_reverse(int_mv ref) {
- int_mv this_mv;
- this_mv.as_mv.row = -ref.as_mv.row;
- this_mv.as_mv.col = -ref.as_mv.col;
+static int motion_field_projection(AV1_COMMON *cm, MV_REFERENCE_FRAME ref_frame,
+ int dir) {
+ TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
+ int ref_offset[REF_FRAMES] = { 0 };
- return this_mv.as_int;
-}
+ (void)dir;
-void av1_setup_motion_field(AV1_COMMON *cm) {
+ int ref_frame_idx = cm->frame_refs[FWD_RF_OFFSET(ref_frame)].idx;
+ if (ref_frame_idx < 0) return 0;
+
+ if (cm->buffer_pool->frame_bufs[ref_frame_idx].intra_only) return 0;
+
+ if (cm->buffer_pool->frame_bufs[ref_frame_idx].mi_rows != cm->mi_rows ||
+ cm->buffer_pool->frame_bufs[ref_frame_idx].mi_cols != cm->mi_cols)
+ return 0;
+
+ int ref_frame_index =
+ cm->buffer_pool->frame_bufs[ref_frame_idx].cur_frame_offset;
+ unsigned int *ref_rf_idx =
+ &cm->buffer_pool->frame_bufs[ref_frame_idx].ref_frame_offset[0];
int cur_frame_index = cm->cur_frame->cur_frame_offset;
- int lst_frame_index = 0, alt_frame_index = 0, gld_frame_index = 0;
-#if CONFIG_EXT_REFS
- int lst2_frame_index = 0, lst3_frame_index = 0;
- int bwd_frame_index = 0, alt2_frame_index = 0;
-#endif
- TPL_MV_REF *tpl_mvs_base = cm->cur_frame->tpl_mvs;
-
- for (int ref_frame = 0; ref_frame < INTER_REFS_PER_FRAME; ++ref_frame) {
- int size = (cm->mi_rows + 16) * cm->mi_stride;
- for (int idx = 0; idx < size; ++idx) {
- for (int i = 0; i < MFMV_STACK_SIZE; ++i)
- tpl_mvs_base[idx].mfmv[ref_frame][i].as_int = INVALID_MV;
- }
+ int ref_to_cur = get_relative_dist(cm, ref_frame_index, cur_frame_index);
+
+ for (MV_REFERENCE_FRAME rf = LAST_FRAME; rf <= INTER_REFS_PER_FRAME; ++rf) {
+ ref_offset[rf] =
+ get_relative_dist(cm, ref_frame_index, ref_rf_idx[rf - LAST_FRAME]);
}
- int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
- int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
- int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
-#if CONFIG_EXT_REFS
- int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
- int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
- int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
- int alt2_buf_idx = cm->frame_refs[ALTREF2_FRAME - LAST_FRAME].idx;
-#endif
-
- if (alt_buf_idx >= 0)
- alt_frame_index = cm->buffer_pool->frame_bufs[alt_buf_idx].cur_frame_offset;
-
- if (lst_buf_idx >= 0)
- lst_frame_index = cm->buffer_pool->frame_bufs[lst_buf_idx].cur_frame_offset;
-
- if (gld_buf_idx >= 0)
- gld_frame_index = cm->buffer_pool->frame_bufs[gld_buf_idx].cur_frame_offset;
-
-#if CONFIG_EXT_REFS
- if (lst2_buf_idx >= 0)
- lst2_frame_index =
- cm->buffer_pool->frame_bufs[lst2_buf_idx].cur_frame_offset;
-
- if (lst3_buf_idx >= 0)
- lst3_frame_index =
- cm->buffer_pool->frame_bufs[lst3_buf_idx].cur_frame_offset;
-
- if (bwd_buf_idx >= 0)
- bwd_frame_index = cm->buffer_pool->frame_bufs[bwd_buf_idx].cur_frame_offset;
-
- if (alt2_buf_idx >= 0)
- alt2_frame_index =
- cm->buffer_pool->frame_bufs[alt2_buf_idx].cur_frame_offset;
-#endif
-
- if (alt_frame_index < cur_frame_index) return;
-
- // ======================
- // Process last frame
- // ======================
- if (lst_buf_idx >= 0) {
- MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[lst_buf_idx].mvs;
- const int lst_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].lst_frame_offset;
- const int alt_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].alt_frame_offset;
- const int gld_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].gld_frame_offset;
-#if CONFIG_EXT_REFS
- const int lst2_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].lst2_frame_offset;
- const int lst3_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].lst3_frame_offset;
- const int bwd_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].bwd_frame_offset;
- const int alt2_frame_idx =
- cm->buffer_pool->frame_bufs[lst_buf_idx].alt2_frame_offset;
-#endif
-
- int alt_offset = AOMMAX(1, alt_frame_idx - lst_frame_index);
- int lst_offset = AOMMAX(1, lst_frame_index - lst_frame_idx);
- int gld_offset = AOMMAX(1, lst_frame_index - gld_frame_idx);
- int cur_to_lst = cur_frame_index - lst_frame_index;
- int cur_to_alt = alt_frame_index - cur_frame_index;
- int cur_to_gld = cur_frame_index - gld_frame_index;
-
-#if CONFIG_EXT_REFS
- int bwd_offset = AOMMAX(1, bwd_frame_idx - lst_frame_index);
- int alt2_offset = AOMMAX(1, alt2_frame_idx - lst_frame_index);
- int lst2_offset = AOMMAX(1, lst_frame_index - lst2_frame_idx);
- int lst3_offset = AOMMAX(1, lst_frame_index - lst3_frame_idx);
- int cur_to_lst2 = cur_frame_index - lst2_frame_index;
- int cur_to_lst3 = cur_frame_index - lst3_frame_index;
- int cur_to_bwd = bwd_frame_index - cur_frame_index;
- int cur_to_alt2 = alt2_frame_index - cur_frame_index;
-#endif
-
- const int is_lst_overlay = (alt_frame_idx == gld_frame_index);
- // clang-format off
- const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
-#if CONFIG_EXT_REFS
- 0, lst_offset, lst2_offset, lst3_offset, gld_offset,
- bwd_offset, alt2_offset, alt_offset
-#else
- 0, lst_offset, gld_offset, alt_offset
-#endif
- };
- // clang-format on
+ if (dir == 2) ref_to_cur = -ref_to_cur;
+
+ MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[ref_frame_idx].mvs;
+ const int mvs_rows = (cm->mi_rows + 1) >> 1;
+ const int mvs_cols = (cm->mi_cols + 1) >> 1;
- for (int blk_row = 0; blk_row < cm->mi_rows && !is_lst_overlay; ++blk_row) {
- for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
- MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
- MV fwd_mv = mv_ref->mv[0].as_mv;
- MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
- mv_ref->ref_frame[1] };
+ for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+ for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
+ MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
+ MV fwd_mv = mv_ref->mv.as_mv;
- // Derive motion vectors toward last reference frame.
- if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
- int_mv this_mv;
- int mi_r, mi_c;
+ if (mv_ref->ref_frame > INTRA_FRAME) {
+ int_mv this_mv;
+ int mi_r, mi_c;
+ const int ref_frame_offset = ref_offset[mv_ref->ref_frame];
- const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
+ int pos_valid = abs(ref_frame_offset) <= MAX_FRAME_DISTANCE &&
+ ref_frame_offset > 0 &&
+ abs(ref_to_cur) <= MAX_FRAME_DISTANCE;
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
+ if (pos_valid) {
+ get_mv_projection(&this_mv.as_mv, fwd_mv, ref_to_cur,
ref_frame_offset);
- int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
- this_mv.as_mv, 1);
-
- if (pos_valid) {
- int mi_offset = mi_r * cm->mi_stride + mi_c;
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST_FRAME)][0].as_int =
- this_mv.as_int;
-
-#if CONFIG_EXT_REFS
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST2_FRAME)][0].as_int =
- this_mv.as_int;
-
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST3_FRAME)][0].as_int =
- this_mv.as_int;
-#endif
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)]
- [0].as_int = this_mv.as_int;
- }
+ pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+ this_mv.as_mv, dir >> 1);
}
- for (int idx = 0; idx < 2; ++idx) {
- if (ref_frame[idx] <= GOLDEN_FRAME) continue;
-
- int_mv this_mv;
- int mi_r, mi_c;
- fwd_mv = mv_ref->mv[idx].as_mv;
+ if (pos_valid) {
+ int mi_offset = mi_r * (cm->mi_stride >> 1) + mi_c;
- const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[idx]];
-
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
- ref_frame_offset);
- int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
- this_mv.as_mv, 0);
-
- if (pos_valid) {
- int mi_offset = mi_r * cm->mi_stride + mi_c;
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(ALTREF_FRAME)]
- [0].as_int = this_mv.as_int;
-
-#if CONFIG_EXT_REFS
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(BWDREF_FRAME)]
- [0].as_int = this_mv.as_int;
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt2,
- ref_frame_offset);
- tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(ALTREF2_FRAME)]
- [0].as_int = this_mv.as_int;
-#endif
- }
+ tpl_mvs_base[mi_offset].mfmv0.as_mv.row = fwd_mv.row;
+ tpl_mvs_base[mi_offset].mfmv0.as_mv.col = fwd_mv.col;
+ tpl_mvs_base[mi_offset].ref_frame_offset = ref_frame_offset;
}
}
}
}
- // =======================
- // Process ARF frame
- // =======================
- if (alt_buf_idx >= 0) {
- MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[alt_buf_idx].mvs;
- const int lst_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].lst_frame_offset;
- const int gld_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].gld_frame_offset;
-#if CONFIG_EXT_REFS
- const int lst2_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].lst2_frame_offset;
- const int lst3_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].lst3_frame_offset;
- const int bwd_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].bwd_frame_offset;
- const int alt2_frame_idx =
- cm->buffer_pool->frame_bufs[alt_buf_idx].alt2_frame_offset;
-#endif
-
- int lst_offset = AOMMAX(1, alt_frame_index - lst_frame_idx);
- int gld_offset = AOMMAX(1, alt_frame_index - gld_frame_idx);
- int cur_to_alt = alt_frame_index - cur_frame_index;
- int cur_to_lst = cur_frame_index - lst_frame_index;
- int cur_to_gld = cur_frame_index - gld_frame_index;
-#if CONFIG_EXT_REFS
- int bwd_offset = AOMMAX(1, alt_frame_index - bwd_frame_idx);
- int alt2_offset = AOMMAX(1, alt_frame_index - alt2_frame_idx);
- int lst2_offset = AOMMAX(1, alt_frame_index - lst2_frame_idx);
- int lst3_offset = AOMMAX(1, alt_frame_index - lst3_frame_idx);
- int cur_to_lst2 = cur_frame_index - lst2_frame_index;
- int cur_to_lst3 = cur_frame_index - lst3_frame_index;
- int cur_to_bwd = bwd_frame_index - cur_frame_index;
- int cur_to_alt2 = alt2_frame_index - cur_frame_index;
-#endif
- const int ref_stamp = FWD_RF_OFFSET(ALTREF_FRAME);
- // clang-format off
- const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
-#if CONFIG_EXT_REFS
- 0, lst_offset, lst2_offset, lst3_offset, gld_offset,
- bwd_offset, alt2_offset, 0,
-#else
- 0, lst_offset, gld_offset, 0,
-#endif
- };
- // clang-format on
+ return 1;
+}
- for (int blk_row = 0; blk_row < cm->mi_rows; ++blk_row) {
- for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
- MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
- MV fwd_mv = mv_ref->mv[0].as_mv;
- MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
- mv_ref->ref_frame[1] };
+void av1_setup_motion_field(AV1_COMMON *cm) {
+ memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
+ if (!cm->seq_params.enable_order_hint) return;
+
+ TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
+ int size = ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1);
+ for (int idx = 0; idx < size; ++idx) {
+ tpl_mvs_base[idx].mfmv0.as_int = INVALID_MV;
+ tpl_mvs_base[idx].ref_frame_offset = 0;
+ }
- const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
+ const int cur_order_hint = cm->cur_frame->cur_frame_offset;
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
- int_mv this_mv;
- int mi_r, mi_c;
+ int ref_buf_idx[INTER_REFS_PER_FRAME];
+ int ref_order_hint[INTER_REFS_PER_FRAME];
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt,
- ref_frame_offset);
- int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
- this_mv.as_mv, 0);
-
- if (pos_valid) {
- int mi_offset = mi_r * cm->mi_stride + mi_c;
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(ALTREF_FRAME)][ref_stamp]
- .as_int = mv_sign_reverse(this_mv);
-
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
-
-#if CONFIG_EXT_REFS
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(BWDREF_FRAME)][ref_stamp]
- .as_int = mv_sign_reverse(this_mv);
-
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt2,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(ALTREF2_FRAME)][ref_stamp]
- .as_int = mv_sign_reverse(this_mv);
-
- if (ref_frame[0] >= LAST2_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST2_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
+ for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ const int ref_idx = ref_frame - LAST_FRAME;
+ const int buf_idx = cm->frame_refs[ref_idx].idx;
+ int order_hint = 0;
- if (ref_frame[0] >= LAST3_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST3_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
-#endif
- if (ref_frame[0] >= GOLDEN_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
- }
- }
- }
- }
- }
+ if (buf_idx >= 0) order_hint = frame_bufs[buf_idx].cur_frame_offset;
-// ==========================================
-// Process BWD reference frame
-// ==========================================
-#if CONFIG_EXT_REFS
- if (bwd_buf_idx >= 0) {
- MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[bwd_buf_idx].mvs;
- const int lst_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].lst_frame_offset;
- const int gld_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].gld_frame_offset;
- const int lst2_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].lst2_frame_offset;
- const int lst3_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].lst3_frame_offset;
- const int bwd_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].bwd_frame_offset;
- const int alt2_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].alt2_frame_offset;
- const int alt_frame_idx =
- cm->buffer_pool->frame_bufs[bwd_buf_idx].alt_frame_offset;
-
- int lst_offset = AOMMAX(1, bwd_frame_index - lst_frame_idx);
- int gld_offset = AOMMAX(1, bwd_frame_index - gld_frame_idx);
- int cur_to_lst = cur_frame_index - lst_frame_index;
-
- int lst2_offset = AOMMAX(1, bwd_frame_index - lst2_frame_idx);
- int lst3_offset = AOMMAX(1, bwd_frame_index - lst3_frame_idx);
- int bwd_offset = AOMMAX(1, bwd_frame_idx - bwd_frame_index);
- int alt2_offset = AOMMAX(1, alt2_frame_idx - bwd_frame_index);
- int alt_offset = AOMMAX(1, alt_frame_idx - bwd_frame_index);
- int cur_to_lst2 = cur_frame_index - lst2_frame_index;
- int cur_to_lst3 = cur_frame_index - lst3_frame_index;
- int cur_to_gld = cur_frame_index - gld_frame_index;
- int cur_to_bwd = bwd_frame_index - cur_frame_index;
-
- const int ref_stamp = FWD_RF_OFFSET(BWDREF_FRAME);
- const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
- 0, lst_offset, lst2_offset, lst3_offset,
- gld_offset, bwd_offset, alt2_offset, alt_offset,
- };
+ ref_buf_idx[ref_idx] = buf_idx;
+ ref_order_hint[ref_idx] = order_hint;
- for (int blk_row = 0; blk_row < cm->mi_rows; ++blk_row) {
- for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
- MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
- MV fwd_mv = mv_ref->mv[0].as_mv;
- MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
- mv_ref->ref_frame[1] };
+ if (get_relative_dist(cm, order_hint, cur_order_hint) > 0)
+ cm->ref_frame_side[ref_frame] = 1;
+ else if (order_hint == cur_order_hint)
+ cm->ref_frame_side[ref_frame] = -1;
+ }
- if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
- const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
- int_mv this_mv;
- int mi_r, mi_c;
+ int ref_stamp = MFMV_STACK_SIZE - 1;
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
- ref_frame_offset);
- int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
- this_mv.as_mv, 0);
-
- if (pos_valid) {
- int mi_offset = mi_r * cm->mi_stride + mi_c;
-
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(BWDREF_FRAME)][ref_stamp]
- .as_int = mv_sign_reverse(this_mv);
-
- // Project the motion vector onto last reference frame
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
-
- if (ref_frame[0] >= LAST2_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST2_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
+ if (ref_buf_idx[LAST_FRAME - LAST_FRAME] >= 0) {
+ const int alt_of_lst_order_hint =
+ frame_bufs[ref_buf_idx[LAST_FRAME - LAST_FRAME]]
+ .ref_frame_offset[ALTREF_FRAME - LAST_FRAME];
- if (ref_frame[0] >= LAST3_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(LAST3_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
+ const int is_lst_overlay =
+ (alt_of_lst_order_hint == ref_order_hint[GOLDEN_FRAME - LAST_FRAME]);
+ if (!is_lst_overlay) motion_field_projection(cm, LAST_FRAME, 2);
+ --ref_stamp;
+ }
- if (ref_frame[0] >= GOLDEN_FRAME) {
- get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
- ref_frame_offset);
- tpl_mvs_base[mi_offset]
- .mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)][ref_stamp]
- .as_int = this_mv.as_int;
- }
- }
- }
- }
- }
+ if (get_relative_dist(cm, ref_order_hint[BWDREF_FRAME - LAST_FRAME],
+ cur_order_hint) > 0) {
+ if (motion_field_projection(cm, BWDREF_FRAME, 0)) --ref_stamp;
+ }
+
+ if (get_relative_dist(cm, ref_order_hint[ALTREF2_FRAME - LAST_FRAME],
+ cur_order_hint) > 0) {
+ if (motion_field_projection(cm, ALTREF2_FRAME, 0)) --ref_stamp;
}
-#endif
+
+ if (get_relative_dist(cm, ref_order_hint[ALTREF_FRAME - LAST_FRAME],
+ cur_order_hint) > 0 &&
+ ref_stamp >= 0)
+ if (motion_field_projection(cm, ALTREF_FRAME, 0)) --ref_stamp;
+
+ if (ref_stamp >= 0 && ref_buf_idx[LAST2_FRAME - LAST_FRAME] >= 0)
+ if (motion_field_projection(cm, LAST2_FRAME, 2)) --ref_stamp;
}
-#endif // CONFIG_MFMV
-#if CONFIG_WARPED_MOTION
-#if WARPED_MOTION_SORT_SAMPLES
static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
- int *pts_mv, int global_offset_r,
- int global_offset_c, int row_offset,
- int sign_r, int col_offset, int sign_c) {
+ int row_offset, int sign_r, int col_offset,
+ int sign_c) {
int bw = block_size_wide[mbmi->sb_type];
int bh = block_size_high[mbmi->sb_type];
- int cr_offset = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
- int cc_offset = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1;
- int x = cc_offset + global_offset_c;
- int y = cr_offset + global_offset_r;
+ int x = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1;
+ int y = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
pts[0] = (x * 8);
pts[1] = (y * 8);
pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
- pts_mv[0] = mbmi->mv[0].as_mv.col;
- pts_mv[1] = mbmi->mv[0].as_mv.row;
}
-// Only sort pts and pts_inref, and pts_mv is not sorted.
-#define TRIM_THR 16
-int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len) {
+// Select samples according to the motion vector difference.
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) {
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
+ const int thresh = clamp(AOMMAX(bw, bh), 16, 112);
int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
- int i, j, k;
- int ret = len;
-
- for (i = 0; i < len; ++i)
- pts_mvd[i] =
- abs(pts_mv[2 * i] - mv->col) + abs(pts_mv[2 * i + 1] - mv->row);
-
- for (i = 1; i <= len - 1; ++i) {
- for (j = 0; j < i; ++j) {
- if (pts_mvd[j] > pts_mvd[i]) {
- int temp, tempi, tempj, ptempi, ptempj;
-
- temp = pts_mvd[i];
- tempi = pts[2 * i];
- tempj = pts[2 * i + 1];
- ptempi = pts_inref[2 * i];
- ptempj = pts_inref[2 * i + 1];
-
- for (k = i; k > j; k--) {
- pts_mvd[k] = pts_mvd[k - 1];
- pts[2 * k] = pts[2 * (k - 1)];
- pts[2 * k + 1] = pts[2 * (k - 1) + 1];
- pts_inref[2 * k] = pts_inref[2 * (k - 1)];
- pts_inref[2 * k + 1] = pts_inref[2 * (k - 1) + 1];
- }
-
- pts_mvd[j] = temp;
- pts[2 * j] = tempi;
- pts[2 * j + 1] = tempj;
- pts_inref[2 * j] = ptempi;
- pts_inref[2 * j + 1] = ptempj;
- break;
- }
- }
+ int i, j, k, l = len;
+ int ret = 0;
+ assert(len <= LEAST_SQUARES_SAMPLES_MAX);
+
+ // Obtain the motion vector difference.
+ for (i = 0; i < len; ++i) {
+ pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) +
+ abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row);
+
+ if (pts_mvd[i] > thresh)
+ pts_mvd[i] = -1;
+ else
+ ret++;
}
- for (i = len - 1; i >= 1; i--) {
- int low = (i == 1) ? 1 : AOMMAX((pts_mvd[i - 1] - pts_mvd[0]) / (i - 1), 1);
-
- if ((pts_mvd[i] - pts_mvd[i - 1]) >= TRIM_THR * low) ret = i;
+ // Keep at least 1 sample.
+ if (!ret) return 1;
+
+ i = 0;
+ j = l - 1;
+ for (k = 0; k < l - ret; k++) {
+ while (pts_mvd[i] != -1) i++;
+ while (pts_mvd[j] == -1) j--;
+ assert(i != j);
+ if (i > j) break;
+
+ // Replace the discarded samples;
+ pts_mvd[i] = pts_mvd[j];
+ pts[2 * i] = pts[2 * j];
+ pts[2 * i + 1] = pts[2 * j + 1];
+ pts_inref[2 * i] = pts_inref[2 * j];
+ pts_inref[2 * i + 1] = pts_inref[2 * j + 1];
+ i++;
+ j--;
}
- if (ret > LEAST_SQUARES_SAMPLES_MAX) ret = LEAST_SQUARES_SAMPLES_MAX;
return ret;
}
// Note: Samples returned are at 1/8-pel precision
+// Sample are the neighbor block center point's coordinates relative to the
+// left-top pixel of current block.
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- int *pts, int *pts_inref, int *pts_mv) {
- MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
+ int *pts, int *pts_inref) {
+ MB_MODE_INFO *const mbmi0 = xd->mi[0];
int ref_frame = mbmi0->ref_frame[0];
int up_available = xd->up_available;
int left_available = xd->left_available;
int i, mi_step = 1, np = 0;
- int global_offset_c = mi_col * MI_SIZE;
- int global_offset_r = mi_row * MI_SIZE;
const TileInfo *const tile = &xd->tile;
int do_tl = 1;
@@ -2082,8 +1166,7 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
// scan the nearest above rows
if (up_available) {
int mi_row_offset = -1;
- MODE_INFO *mi = xd->mi[mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ MB_MODE_INFO *mbmi = xd->mi[mi_row_offset * xd->mi_stride];
uint8_t n8_w = mi_size_wide[mbmi->sb_type];
if (xd->n8_w <= n8_w) {
@@ -2094,42 +1177,38 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (col_offset + n8_w > xd->n8_w) do_tr = 0;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, 0, -1, col_offset, 1);
+ record_samples(mbmi, pts, pts_inref, 0, -1, col_offset, 1);
pts += 2;
pts_inref += 2;
- pts_mv += 2;
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
} else {
// Handle "current block width > above block width" case.
for (i = 0; i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
int mi_col_offset = i;
- mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- mbmi = &mi->mbmi;
+ mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
n8_w = mi_size_wide[mbmi->sb_type];
mi_step = AOMMIN(xd->n8_w, n8_w);
if (mbmi->ref_frame[0] == ref_frame &&
mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, 0, -1, i, 1);
+ record_samples(mbmi, pts, pts_inref, 0, -1, i, 1);
pts += 2;
pts_inref += 2;
- pts_mv += 2;
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
+ assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// scan the nearest left columns
if (left_available) {
int mi_col_offset = -1;
- MODE_INFO *mi = xd->mi[mi_col_offset];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ MB_MODE_INFO *mbmi = xd->mi[mi_col_offset];
uint8_t n8_h = mi_size_high[mbmi->sb_type];
if (xd->n8_h <= n8_h) {
@@ -2139,182 +1218,329 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (row_offset < 0) do_tl = 0;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, row_offset, 1, 0, -1);
+ record_samples(mbmi, pts, pts_inref, row_offset, 1, 0, -1);
pts += 2;
pts_inref += 2;
- pts_mv += 2;
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
} else {
// Handle "current block height > above block height" case.
for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
int mi_row_offset = i;
- mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- mbmi = &mi->mbmi;
+ mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
n8_h = mi_size_high[mbmi->sb_type];
mi_step = AOMMIN(xd->n8_h, n8_h);
if (mbmi->ref_frame[0] == ref_frame &&
mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, i, 1, 0, -1);
+ record_samples(mbmi, pts, pts_inref, i, 1, 0, -1);
pts += 2;
pts_inref += 2;
- pts_mv += 2;
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
+ assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// Top-left block
if (do_tl && left_available && up_available) {
int mi_row_offset = -1;
int mi_col_offset = -1;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ MB_MODE_INFO *mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, 0, -1, 0, -1);
+ record_samples(mbmi, pts, pts_inref, 0, -1, 0, -1);
pts += 2;
pts_inref += 2;
- pts_mv += 2;
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
+ assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// Top-right block
if (do_tr &&
has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->n8_w, xd->n8_h))) {
POSITION trb_pos = { -1, xd->n8_w };
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &trb_pos)) {
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &trb_pos)) {
int mi_row_offset = -1;
int mi_col_offset = xd->n8_w;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ MB_MODE_INFO *mbmi =
+ xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
- global_offset_c, 0, -1, xd->n8_w, 1);
+ record_samples(mbmi, pts, pts_inref, 0, -1, xd->n8_w, 1);
np++;
+ if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
+ assert(np <= LEAST_SQUARES_SAMPLES_MAX);
return np;
}
-#else
-void calc_projection_samples(MB_MODE_INFO *const mbmi, int x, int y,
- int *pts_inref) {
- pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
- pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
+
+void av1_setup_skip_mode_allowed(AV1_COMMON *cm) {
+ cm->is_skip_mode_allowed = 0;
+ cm->ref_frame_idx_0 = cm->ref_frame_idx_1 = INVALID_IDX;
+
+ if (!cm->seq_params.enable_order_hint || frame_is_intra_only(cm) ||
+ cm->reference_mode == SINGLE_REFERENCE)
+ return;
+
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ const int cur_frame_offset = cm->frame_offset;
+ int ref_frame_offset[2] = { -1, INT_MAX };
+ int ref_idx[2] = { INVALID_IDX, INVALID_IDX };
+
+ // Identify the nearest forward and backward references.
+ for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ const int buf_idx = cm->frame_refs[i].idx;
+ if (buf_idx == INVALID_IDX) continue;
+
+ const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
+ if (get_relative_dist(cm, ref_offset, cur_frame_offset) < 0) {
+ // Forward reference
+ if (ref_frame_offset[0] == -1 ||
+ get_relative_dist(cm, ref_offset, ref_frame_offset[0]) > 0) {
+ ref_frame_offset[0] = ref_offset;
+ ref_idx[0] = i;
+ }
+ } else if (get_relative_dist(cm, ref_offset, cur_frame_offset) > 0) {
+ // Backward reference
+ if (ref_frame_offset[1] == INT_MAX ||
+ get_relative_dist(cm, ref_offset, ref_frame_offset[1]) < 0) {
+ ref_frame_offset[1] = ref_offset;
+ ref_idx[1] = i;
+ }
+ }
+ }
+
+ if (ref_idx[0] != INVALID_IDX && ref_idx[1] != INVALID_IDX) {
+ // == Bi-directional prediction ==
+ cm->is_skip_mode_allowed = 1;
+ cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
+ cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
+ } else if (ref_idx[0] != INVALID_IDX && ref_idx[1] == INVALID_IDX) {
+ // == Forward prediction only ==
+ // Identify the second nearest forward reference.
+ ref_frame_offset[1] = -1;
+ for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ const int buf_idx = cm->frame_refs[i].idx;
+ if (buf_idx == INVALID_IDX) continue;
+
+ const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
+ if ((ref_frame_offset[0] != -1 &&
+ get_relative_dist(cm, ref_offset, ref_frame_offset[0]) < 0) &&
+ (ref_frame_offset[1] == -1 ||
+ get_relative_dist(cm, ref_offset, ref_frame_offset[1]) > 0)) {
+ // Second closest forward reference
+ ref_frame_offset[1] = ref_offset;
+ ref_idx[1] = i;
+ }
+ }
+ if (ref_frame_offset[1] != -1) {
+ cm->is_skip_mode_allowed = 1;
+ cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
+ cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
+ }
+ }
}
-// Note: Samples returned are at 1/8-pel precision
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- int *pts, int *pts_inref) {
- MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
- int ref_frame = mbmi0->ref_frame[0];
- int up_available = xd->up_available;
- int left_available = xd->left_available;
- int i, mi_step, np = 0;
- int global_offset_c = mi_col * MI_SIZE;
- int global_offset_r = mi_row * MI_SIZE;
+typedef struct {
+ int map_idx; // frame map index
+ int buf_idx; // frame buffer index
+ int sort_idx; // index based on the offset to be used for sorting
+} REF_FRAME_INFO;
+
+static int compare_ref_frame_info(const void *arg_a, const void *arg_b) {
+ const REF_FRAME_INFO *info_a = (REF_FRAME_INFO *)arg_a;
+ const REF_FRAME_INFO *info_b = (REF_FRAME_INFO *)arg_b;
+
+ if (info_a->sort_idx < info_b->sort_idx) return -1;
+ if (info_a->sort_idx > info_b->sort_idx) return 1;
+ return (info_a->map_idx < info_b->map_idx)
+ ? -1
+ : ((info_a->map_idx > info_b->map_idx) ? 1 : 0);
+}
- // scan the above row
- if (up_available) {
- for (i = 0; i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
- int mi_row_offset = -1;
- int mi_col_offset = i;
+static void set_ref_frame_info(AV1_COMMON *const cm, int frame_idx,
+ REF_FRAME_INFO *ref_info) {
+ assert(frame_idx >= 0 && frame_idx <= INTER_REFS_PER_FRAME);
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ const int buf_idx = ref_info->buf_idx;
- mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
+ cm->frame_refs[frame_idx].idx = buf_idx;
+ cm->frame_refs[frame_idx].buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
+ cm->frame_refs[frame_idx].map_idx = ref_info->map_idx;
+}
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- int bw = block_size_wide[mbmi->sb_type];
- int bh = block_size_high[mbmi->sb_type];
- int cr_offset = -AOMMAX(bh, MI_SIZE) / 2 - 1;
- int cc_offset = i * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1;
- int x = cc_offset + global_offset_c;
- int y = cr_offset + global_offset_r;
-
- pts[0] = (x * 8);
- pts[1] = (y * 8);
- calc_projection_samples(mbmi, x, y, pts_inref);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
+void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx,
+ int gld_map_idx) {
+ BufferPool *const pool = cm->buffer_pool;
+ RefCntBuffer *const frame_bufs = pool->frame_bufs;
+
+ int lst_frame_sort_idx = -1;
+ int gld_frame_sort_idx = -1;
+
+ assert(cm->seq_params.enable_order_hint);
+ assert(cm->seq_params.order_hint_bits_minus_1 >= 0);
+ const int cur_frame_offset = (int)cm->frame_offset;
+ const int cur_frame_sort_idx = 1 << cm->seq_params.order_hint_bits_minus_1;
+
+ REF_FRAME_INFO ref_frame_info[REF_FRAMES];
+ int ref_flag_list[INTER_REFS_PER_FRAME] = { 0, 0, 0, 0, 0, 0, 0 };
+
+ for (int i = 0; i < REF_FRAMES; ++i) {
+ const int map_idx = i;
+
+ ref_frame_info[i].map_idx = map_idx;
+ ref_frame_info[i].sort_idx = -1;
+
+ const int buf_idx = cm->ref_frame_map[map_idx];
+ ref_frame_info[i].buf_idx = buf_idx;
+
+ if (buf_idx < 0 || buf_idx >= FRAME_BUFFERS) continue;
+ // TODO(zoeliu@google.com): To verify the checking on ref_count.
+ if (frame_bufs[buf_idx].ref_count <= 0) continue;
+
+ const int offset = (int)frame_bufs[buf_idx].cur_frame_offset;
+ ref_frame_info[i].sort_idx =
+ (offset == -1) ? -1
+ : cur_frame_sort_idx +
+ get_relative_dist(cm, offset, cur_frame_offset);
+ assert(ref_frame_info[i].sort_idx >= -1);
+
+ if (map_idx == lst_map_idx) lst_frame_sort_idx = ref_frame_info[i].sort_idx;
+ if (map_idx == gld_map_idx) gld_frame_sort_idx = ref_frame_info[i].sort_idx;
+ }
+
+ // Confirm both LAST_FRAME and GOLDEN_FRAME are valid forward reference
+ // frames.
+ if (lst_frame_sort_idx == -1 || lst_frame_sort_idx >= cur_frame_sort_idx) {
+ aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ "Inter frame requests a look-ahead frame as LAST");
+ }
+ if (gld_frame_sort_idx == -1 || gld_frame_sort_idx >= cur_frame_sort_idx) {
+ aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ "Inter frame requests a look-ahead frame as GOLDEN");
+ }
+
+ // Sort ref frames based on their frame_offset values.
+ qsort(ref_frame_info, REF_FRAMES, sizeof(REF_FRAME_INFO),
+ compare_ref_frame_info);
+
+ // Identify forward and backward reference frames.
+ // Forward reference: offset < cur_frame_offset
+ // Backward reference: offset >= cur_frame_offset
+ int fwd_start_idx = 0, fwd_end_idx = REF_FRAMES - 1;
+
+ for (int i = 0; i < REF_FRAMES; i++) {
+ if (ref_frame_info[i].sort_idx == -1) {
+ fwd_start_idx++;
+ continue;
+ }
+
+ if (ref_frame_info[i].sort_idx >= cur_frame_sort_idx) {
+ fwd_end_idx = i - 1;
+ break;
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
- // scan the left column
- if (left_available) {
- for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = -1;
+ int bwd_start_idx = fwd_end_idx + 1;
+ int bwd_end_idx = REF_FRAMES - 1;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ // === Backward Reference Frames ===
- mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
+ // == ALTREF_FRAME ==
+ if (bwd_start_idx <= bwd_end_idx) {
+ set_ref_frame_info(cm, ALTREF_FRAME - LAST_FRAME,
+ &ref_frame_info[bwd_end_idx]);
+ ref_flag_list[ALTREF_FRAME - LAST_FRAME] = 1;
+ bwd_end_idx--;
+ }
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- int bw = block_size_wide[mbmi->sb_type];
- int bh = block_size_high[mbmi->sb_type];
- int cr_offset = i * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1;
- int cc_offset = -AOMMAX(bw, MI_SIZE) / 2 - 1;
- int x = cc_offset + global_offset_c;
- int y = cr_offset + global_offset_r;
-
- pts[0] = (x * 8);
- pts[1] = (y * 8);
- calc_projection_samples(mbmi, x, y, pts_inref);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
+ // == BWDREF_FRAME ==
+ if (bwd_start_idx <= bwd_end_idx) {
+ set_ref_frame_info(cm, BWDREF_FRAME - LAST_FRAME,
+ &ref_frame_info[bwd_start_idx]);
+ ref_flag_list[BWDREF_FRAME - LAST_FRAME] = 1;
+ bwd_start_idx++;
+ }
+
+ // == ALTREF2_FRAME ==
+ if (bwd_start_idx <= bwd_end_idx) {
+ set_ref_frame_info(cm, ALTREF2_FRAME - LAST_FRAME,
+ &ref_frame_info[bwd_start_idx]);
+ ref_flag_list[ALTREF2_FRAME - LAST_FRAME] = 1;
+ }
+
+ // === Forward Reference Frames ===
+
+ for (int i = fwd_start_idx; i <= fwd_end_idx; ++i) {
+ // == LAST_FRAME ==
+ if (ref_frame_info[i].map_idx == lst_map_idx) {
+ set_ref_frame_info(cm, LAST_FRAME - LAST_FRAME, &ref_frame_info[i]);
+ ref_flag_list[LAST_FRAME - LAST_FRAME] = 1;
+ }
+
+ // == GOLDEN_FRAME ==
+ if (ref_frame_info[i].map_idx == gld_map_idx) {
+ set_ref_frame_info(cm, GOLDEN_FRAME - LAST_FRAME, &ref_frame_info[i]);
+ ref_flag_list[GOLDEN_FRAME - LAST_FRAME] = 1;
}
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
- if (left_available && up_available) {
- int mi_row_offset = -1;
- int mi_col_offset = -1;
+ assert(ref_flag_list[LAST_FRAME - LAST_FRAME] == 1 &&
+ ref_flag_list[GOLDEN_FRAME - LAST_FRAME] == 1);
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
+ // == LAST2_FRAME ==
+ // == LAST3_FRAME ==
+ // == BWDREF_FRAME ==
+ // == ALTREF2_FRAME ==
+ // == ALTREF_FRAME ==
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- int bw = block_size_wide[mbmi->sb_type];
- int bh = block_size_high[mbmi->sb_type];
- int cr_offset = -AOMMAX(bh, MI_SIZE) / 2 - 1;
- int cc_offset = -AOMMAX(bw, MI_SIZE) / 2 - 1;
- int x = cc_offset + global_offset_c;
- int y = cr_offset + global_offset_r;
-
- pts[0] = (x * 8);
- pts[1] = (y * 8);
- calc_projection_samples(mbmi, x, y, pts_inref);
- np++;
+ // Set up the reference frames in the anti-chronological order.
+ static const MV_REFERENCE_FRAME ref_frame_list[INTER_REFS_PER_FRAME - 2] = {
+ LAST2_FRAME, LAST3_FRAME, BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME
+ };
+
+ int ref_idx;
+ for (ref_idx = 0; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
+ const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
+
+ if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
+
+ while (fwd_start_idx <= fwd_end_idx &&
+ (ref_frame_info[fwd_end_idx].map_idx == lst_map_idx ||
+ ref_frame_info[fwd_end_idx].map_idx == gld_map_idx)) {
+ fwd_end_idx--;
}
+ if (fwd_start_idx > fwd_end_idx) break;
+
+ set_ref_frame_info(cm, ref_frame - LAST_FRAME,
+ &ref_frame_info[fwd_end_idx]);
+ ref_flag_list[ref_frame - LAST_FRAME] = 1;
+
+ fwd_end_idx--;
}
- assert(2 * np <= SAMPLES_ARRAY_SIZE);
- return np;
+ // Assign all the remaining frame(s), if any, to the earliest reference frame.
+ for (; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
+ const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
+ if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
+ set_ref_frame_info(cm, ref_frame - LAST_FRAME,
+ &ref_frame_info[fwd_start_idx]);
+ ref_flag_list[ref_frame - LAST_FRAME] = 1;
+ }
+
+ for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
+ assert(ref_flag_list[i] == 1);
+ }
}
-#endif // WARPED_MOTION_SORT_SAMPLES
-#endif // CONFIG_WARPED_MOTION
diff --git a/third_party/aom/av1/common/mvref_common.h b/third_party/aom/av1/common/mvref_common.h
index 348887e43..716b4a247 100644
--- a/third_party/aom/av1/common/mvref_common.h
+++ b/third_party/aom/av1/common/mvref_common.h
@@ -18,103 +18,36 @@
extern "C" {
#endif
-#define MVREF_NEIGHBOURS 9
-#define MVREF_ROWS 3
-#define MVREF_COLS 4
+#define MVREF_ROW_COLS 3
+
+// Set the upper limit of the motion vector component magnitude.
+// This would make a motion vector fit in 26 bits. Plus 3 bits for the
+// reference frame index. A tuple of motion vector can hence be stored within
+// 32 bit range for efficient load/store operations.
+#define REFMVS_LIMIT ((1 << 12) - 1)
typedef struct position {
int row;
int col;
} POSITION;
-typedef enum {
- BOTH_ZERO = 0,
- ZERO_PLUS_PREDICTED = 1,
- BOTH_PREDICTED = 2,
- NEW_PLUS_NON_INTRA = 3,
- BOTH_NEW = 4,
- INTRA_PLUS_NON_INTRA = 5,
- BOTH_INTRA = 6,
- INVALID_CASE = 9
-} motion_vector_context;
-
-// This is used to figure out a context for the ref blocks. The code flattens
-// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
-// adding 9 for each intra block, 3 for each zero mv and 1 for each new
-// motion vector. This single number is then converted into a context
-// with a single lookup ( counter_to_context ).
-static const int mode_2_counter[] = {
- 9, // DC_PRED
- 9, // V_PRED
- 9, // H_PRED
- 9, // D45_PRED
- 9, // D135_PRED
- 9, // D117_PRED
- 9, // D153_PRED
- 9, // D207_PRED
- 9, // D63_PRED
- 9, // SMOOTH_PRED
-#if CONFIG_SMOOTH_HV
- 9, // SMOOTH_V_PRED
- 9, // SMOOTH_H_PRED
-#endif // CONFIG_SMOOTH_HV
- 9, // TM_PRED
- 0, // NEARESTMV
- 0, // NEARMV
- 3, // ZEROMV
- 1, // NEWMV
-#if CONFIG_COMPOUND_SINGLEREF
- 0, // SR_NEAREST_NEARMV
- // 1, // SR_NEAREST_NEWMV
- 1, // SR_NEAR_NEWMV
- 3, // SR_ZERO_NEWMV
- 1, // SR_NEW_NEWMV
-#endif // CONFIG_COMPOUND_SINGLEREF
- 0, // NEAREST_NEARESTMV
- 0, // NEAR_NEARMV
- 1, // NEAREST_NEWMV
- 1, // NEW_NEARESTMV
- 1, // NEAR_NEWMV
- 1, // NEW_NEARMV
- 3, // ZERO_ZEROMV
- 1, // NEW_NEWMV
-};
+// clamp_mv_ref
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-// There are 3^3 different combinations of 3 counts that can be either 0,1 or
-// 2. However the actual count can never be greater than 2 so the highest
-// counter we need is 18. 9 is an invalid counter that's never used.
-static const int counter_to_context[19] = {
- BOTH_PREDICTED, // 0
- NEW_PLUS_NON_INTRA, // 1
- BOTH_NEW, // 2
- ZERO_PLUS_PREDICTED, // 3
- NEW_PLUS_NON_INTRA, // 4
- INVALID_CASE, // 5
- BOTH_ZERO, // 6
- INVALID_CASE, // 7
- INVALID_CASE, // 8
- INTRA_PLUS_NON_INTRA, // 9
- INTRA_PLUS_NON_INTRA, // 10
- INVALID_CASE, // 11
- INTRA_PLUS_NON_INTRA, // 12
- INVALID_CASE, // 13
- INVALID_CASE, // 14
- INVALID_CASE, // 15
- INVALID_CASE, // 16
- INVALID_CASE, // 17
- BOTH_INTRA // 18
-};
+static INLINE int get_relative_dist(const AV1_COMMON *cm, int a, int b) {
+ if (!cm->seq_params.enable_order_hint) return 0;
-static const int idx_n_column_to_subblock[4][2] = {
- { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 }
-};
+ const int bits = cm->seq_params.order_hint_bits_minus_1 + 1;
-// clamp_mv_ref
-#if CONFIG_EXT_PARTITION
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-#else
-#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
-#endif // CONFIG_EXT_PARTITION
+ assert(bits >= 1);
+ assert(a >= 0 && a < (1 << bits));
+ assert(b >= 0 && b < (1 << bits));
+
+ int diff = a - b;
+ int m = 1 << (bits - 1);
+ diff = (diff & (m - 1)) - (diff & m);
+ return diff;
+}
static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
@@ -125,19 +58,16 @@ static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
- int search_col, int block_idx) {
+static INLINE int_mv get_sub_block_mv(const MB_MODE_INFO *candidate,
+ int which_mv, int search_col) {
(void)search_col;
- (void)block_idx;
- return candidate->mbmi.mv[which_mv];
+ return candidate->mv[which_mv];
}
-static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
- int which_mv, int search_col,
- int block_idx) {
+static INLINE int_mv get_sub_block_pred_mv(const MB_MODE_INFO *candidate,
+ int which_mv, int search_col) {
(void)search_col;
- (void)block_idx;
- return candidate->mbmi.mv[which_mv];
+ return candidate->mv[which_mv];
}
// Performs mv sign inversion if indicated by the reference frame combination.
@@ -152,48 +82,11 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
return mv;
}
-#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
-
-// This macro is used to add a motion vector mv_ref list if it isn't
-// already in the list. If it's the second motion vector it will also
-// skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done) \
- do { \
- (mv_ref_list)[(refmv_count)] = (mv); \
- CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd)); \
- if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \
- (refmv_count) = 2; \
- goto Done; \
- } \
- (refmv_count) = 1; \
- } while (0)
-
-// If either reference frame is different, not INTRA, and they
-// are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
- mv_ref_list, bw, bh, xd, Done) \
- do { \
- if (is_inter_block(mbmi)) { \
- if ((mbmi)->ref_frame[0] != ref_frame) \
- ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
- refmv_count, mv_ref_list, bw, bh, xd, Done); \
- if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame) \
- ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
- refmv_count, mv_ref_list, bw, bh, xd, Done); \
- } \
- } while (0)
-
// Checks that the given mi_row, mi_col and search point
// are inside the borders of the tile.
static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
- int mi_rows, const AV1_COMMON *cm,
- const POSITION *mi_pos) {
-#if CONFIG_DEPENDENT_HORZTILES
- const int dependent_horz_tile_flag = cm->dependent_horz_tiles;
-#else
+ int mi_rows, const POSITION *mi_pos) {
const int dependent_horz_tile_flag = 0;
- (void)cm;
-#endif
if (dependent_horz_tile_flag && !tile->tg_horz_boundary) {
return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start ||
@@ -208,14 +101,8 @@ static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
}
static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row,
- int mi_rows, const AV1_COMMON *cm,
- int row_offset) {
-#if CONFIG_DEPENDENT_HORZTILES
- const int dependent_horz_tile_flag = cm->dependent_horz_tiles;
-#else
+ int mi_rows, int row_offset) {
const int dependent_horz_tile_flag = 0;
- (void)cm;
-#endif
if (dependent_horz_tile_flag && !tile->tg_horz_boundary)
return clamp(row_offset, -mi_row, mi_rows - mi_row - 1);
else
@@ -229,87 +116,49 @@ static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col,
tile->mi_col_end - mi_col - 1);
}
-static INLINE void lower_mv_precision(MV *mv, int allow_hp
-#if CONFIG_AMVR
- ,
- int is_integer
-#endif
- ) {
-#if CONFIG_AMVR
+static INLINE void lower_mv_precision(MV *mv, int allow_hp, int is_integer) {
if (is_integer) {
integer_mv_precision(mv);
} else {
-#endif
if (!allow_hp) {
if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
}
-#if CONFIG_AMVR
}
-#endif
-}
-
-static INLINE uint8_t av1_get_pred_diff_ctx(const int_mv pred_mv,
- const int_mv this_mv) {
- if (abs(this_mv.as_mv.row - pred_mv.as_mv.row) <= 4 &&
- abs(this_mv.as_mv.col - pred_mv.as_mv.col) <= 4)
- return 2;
- else
- return 1;
-}
-
-static INLINE int av1_nmv_ctx(const uint8_t ref_mv_count,
- const CANDIDATE_MV *ref_mv_stack, int ref,
- int ref_mv_idx) {
- if (ref_mv_stack[ref_mv_idx].weight >= REF_CAT_LEVEL && ref_mv_count > 0)
- return ref_mv_stack[ref_mv_idx].pred_diff[ref];
-
- return 0;
}
-#if CONFIG_EXT_COMP_REFS
-static INLINE int8_t av1_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
+static INLINE int8_t get_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
// Single ref pred
if (rf[1] <= INTRA_FRAME) return -1;
// Bi-directional comp ref pred
if ((rf[0] < BWDREF_FRAME) && (rf[1] >= BWDREF_FRAME)) return -1;
- for (int8_t ref_idx = 0; ref_idx < UNIDIR_COMP_REFS; ++ref_idx) {
+ for (int8_t ref_idx = 0; ref_idx < TOTAL_UNIDIR_COMP_REFS; ++ref_idx) {
if (rf[0] == comp_ref0(ref_idx) && rf[1] == comp_ref1(ref_idx))
return ref_idx;
}
return -1;
}
-#endif // CONFIG_EXT_COMP_REFS
static INLINE int8_t av1_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
if (rf[1] > INTRA_FRAME) {
-#if CONFIG_EXT_COMP_REFS
- int8_t uni_comp_ref_idx = av1_uni_comp_ref_idx(rf);
-#if !USE_UNI_COMP_REFS
- // NOTE: uni-directional comp refs disabled
- assert(uni_comp_ref_idx < 0);
-#endif // !USE_UNI_COMP_REFS
+ const int8_t uni_comp_ref_idx = get_uni_comp_ref_idx(rf);
if (uni_comp_ref_idx >= 0) {
- assert((TOTAL_REFS_PER_FRAME + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
+ assert((REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
MODE_CTX_REF_FRAMES);
- return TOTAL_REFS_PER_FRAME + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
+ return REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
} else {
-#endif // CONFIG_EXT_COMP_REFS
- return TOTAL_REFS_PER_FRAME + FWD_RF_OFFSET(rf[0]) +
+ return REF_FRAMES + FWD_RF_OFFSET(rf[0]) +
BWD_RF_OFFSET(rf[1]) * FWD_REFS;
-#if CONFIG_EXT_COMP_REFS
}
-#endif // CONFIG_EXT_COMP_REFS
}
return rf[0];
}
// clang-format off
-static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
-#if CONFIG_EXT_REFS
+static MV_REFERENCE_FRAME ref_frame_map[TOTAL_COMP_REFS][2] = {
{ LAST_FRAME, BWDREF_FRAME }, { LAST2_FRAME, BWDREF_FRAME },
{ LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
@@ -317,58 +166,51 @@ static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
{ LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME },
{ LAST_FRAME, ALTREF_FRAME }, { LAST2_FRAME, ALTREF_FRAME },
- { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
-
- // TODO(zoeliu): Temporarily disable uni-directional comp refs
-#if CONFIG_EXT_COMP_REFS
- , { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
- { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME }
- // TODO(zoeliu): When ALTREF2 is enabled, we may add:
- // {BWDREF_FRAME, ALTREF2_FRAME}
-#endif // CONFIG_EXT_COMP_REFS
-#else // !CONFIG_EXT_REFS
- { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
-#endif // CONFIG_EXT_REFS
+ { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
+
+ { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
+ { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
+
+ // NOTE: Following reference frame pairs are not supported to be explicitly
+ // signalled, but they are possibly chosen by the use of skip_mode,
+ // which may use the most recent one-sided reference frame pair.
+ { LAST2_FRAME, LAST3_FRAME }, { LAST2_FRAME, GOLDEN_FRAME },
+ { LAST3_FRAME, GOLDEN_FRAME }, {BWDREF_FRAME, ALTREF2_FRAME},
+ { ALTREF2_FRAME, ALTREF_FRAME }
};
// clang-format on
static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf,
int8_t ref_frame_type) {
- if (ref_frame_type >= TOTAL_REFS_PER_FRAME) {
- rf[0] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][0];
- rf[1] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][1];
+ if (ref_frame_type >= REF_FRAMES) {
+ rf[0] = ref_frame_map[ref_frame_type - REF_FRAMES][0];
+ rf[1] = ref_frame_map[ref_frame_type - REF_FRAMES][1];
} else {
rf[0] = ref_frame_type;
rf[1] = NONE_FRAME;
-#if CONFIG_INTRABC
assert(ref_frame_type > NONE_FRAME);
-#else
- assert(ref_frame_type > INTRA_FRAME);
-#endif
- assert(ref_frame_type < TOTAL_REFS_PER_FRAME);
}
}
+static uint16_t compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = {
+ { 0, 1, 1, 1, 1 },
+ { 1, 2, 3, 4, 4 },
+ { 4, 4, 5, 6, 7 },
+};
+
static INLINE int16_t av1_mode_context_analyzer(
- const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf,
- BLOCK_SIZE bsize, int block) {
- int16_t mode_ctx = 0;
- int8_t ref_frame_type = av1_ref_frame_type(rf);
-
- if (block >= 0) {
- mode_ctx = mode_context[rf[0]] & 0x00ff;
-#if !CONFIG_CB4X4
- if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4)
- mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET);
-#else
- (void)block;
- (void)bsize;
-#endif
+ const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) {
+ const int8_t ref_frame = av1_ref_frame_type(rf);
- return mode_ctx;
- }
+ if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
+
+ const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
+ const int16_t refmv_ctx =
+ (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
- return mode_context[ref_frame_type];
+ const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(
+ newmv_ctx, COMP_NEWMV_CTXS - 1)];
+ return comp_ctx;
}
static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
@@ -379,92 +221,99 @@ static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 2;
+ return 1;
if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 3;
+ return 2;
return 0;
}
-#if CONFIG_FRAME_MARKER
void av1_setup_frame_buf_refs(AV1_COMMON *cm);
-#if CONFIG_FRAME_SIGN_BIAS
void av1_setup_frame_sign_bias(AV1_COMMON *cm);
-#endif // CONFIG_FRAME_SIGN_BIAS
-#if CONFIG_MFMV
+void av1_setup_skip_mode_allowed(AV1_COMMON *cm);
void av1_setup_motion_field(AV1_COMMON *cm);
-#endif // CONFIG_MFMV
-#endif // CONFIG_FRAME_MARKER
+void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, int gld_map_idx);
+
+static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) {
+ av1_zero(xd->neighbors_ref_counts);
+
+ uint8_t *const ref_counts = xd->neighbors_ref_counts;
+
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Above neighbor
+ if (above_in_image && is_inter_block(above_mbmi)) {
+ ref_counts[above_mbmi->ref_frame[0]]++;
+ if (has_second_ref(above_mbmi)) {
+ ref_counts[above_mbmi->ref_frame[1]]++;
+ }
+ }
+
+ // Left neighbor
+ if (left_in_image && is_inter_block(left_mbmi)) {
+ ref_counts[left_mbmi->ref_frame[0]]++;
+ if (has_second_ref(left_mbmi)) {
+ ref_counts[left_mbmi->ref_frame[1]]++;
+ }
+ }
+}
-void av1_copy_frame_mvs(const AV1_COMMON *const cm, MODE_INFO *mi, int mi_row,
- int mi_col, int x_mis, int y_mis);
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MB_MODE_INFO *mi,
+ int mi_row, int mi_col, int x_mis, int y_mis);
-typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
- int16_t *compound_mode_context, int_mv *mv_ref_list,
- int mi_row, int mi_col, find_mv_refs_sync sync,
- void *const data, int16_t *mode_context);
+ MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+ CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+ int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+ int_mv *global_mvs, int mi_row, int mi_col,
+ int16_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
// score to use as ref motion vector
-#if CONFIG_AMVR
void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
int_mv *near_mv, int is_integer);
-#else
-void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
- int_mv *near_mv);
-#endif
-void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int block, int ref, int mi_row, int mi_col,
- CANDIDATE_MV *ref_mv_stack,
- uint8_t *ref_mv_count, int_mv *mv_list,
- int_mv *nearest_mv, int_mv *near_mv);
-
-// This function keeps a mode count for a given MB/SB
-void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list, int block, int mi_row,
- int mi_col, int16_t *mode_context);
-
-#if CONFIG_WARPED_MOTION
-#if WARPED_MOTION_SORT_SAMPLES
-int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len);
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- int *pts, int *pts_inref, int *pts_mv);
-#else
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int *pts, int *pts_inref);
-#endif // WARPED_MOTION_SORT_SAMPLES
-#endif // CONFIG_WARPED_MOTION
-#if CONFIG_INTRABC
-static INLINE void av1_find_ref_dv(int_mv *ref_dv, int mi_row, int mi_col) {
- // TODO(aconverse@google.com): Handle tiles and such
+#define INTRABC_DELAY_PIXELS 256 // Delay of 256 pixels
+#define INTRABC_DELAY_SB64 (INTRABC_DELAY_PIXELS / 64)
+#define USE_WAVE_FRONT 1 // Use only top left area of frame for reference.
+
+static INLINE void av1_find_ref_dv(int_mv *ref_dv, const TileInfo *const tile,
+ int mib_size, int mi_row, int mi_col) {
(void)mi_col;
- if (mi_row < MAX_MIB_SIZE) {
+ if (mi_row - mib_size < tile->mi_row_start) {
ref_dv->as_mv.row = 0;
- ref_dv->as_mv.col = -MI_SIZE * MAX_MIB_SIZE;
+ ref_dv->as_mv.col = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS;
} else {
- ref_dv->as_mv.row = -MI_SIZE * MAX_MIB_SIZE;
+ ref_dv->as_mv.row = -MI_SIZE * mib_size;
ref_dv->as_mv.col = 0;
}
+ ref_dv->as_mv.row *= 8;
+ ref_dv->as_mv.col *= 8;
}
-static INLINE int is_dv_valid(const MV dv, const TileInfo *const tile,
- int mi_row, int mi_col, BLOCK_SIZE bsize) {
+static INLINE int av1_is_dv_valid(const MV dv, const AV1_COMMON *cm,
+ const MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int mib_size_log2) {
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
const int SCALE_PX_TO_MV = 8;
// Disallow subpixel for now
// SUBPEL_MASK is not the correct scale
- if ((dv.row & (SCALE_PX_TO_MV - 1) || dv.col & (SCALE_PX_TO_MV - 1)))
+ if (((dv.row & (SCALE_PX_TO_MV - 1)) || (dv.col & (SCALE_PX_TO_MV - 1))))
return 0;
+
+ const TileInfo *const tile = &xd->tile;
// Is the source top-left inside the current tile?
const int src_top_edge = mi_row * MI_SIZE * SCALE_PX_TO_MV + dv.row;
const int tile_top_edge = tile->mi_row_start * MI_SIZE * SCALE_PX_TO_MV;
@@ -479,20 +328,44 @@ static INLINE int is_dv_valid(const MV dv, const TileInfo *const tile,
const int src_right_edge = (mi_col * MI_SIZE + bw) * SCALE_PX_TO_MV + dv.col;
const int tile_right_edge = tile->mi_col_end * MI_SIZE * SCALE_PX_TO_MV;
if (src_right_edge > tile_right_edge) return 0;
- // Is the bottom right within an already coded SB?
- const int active_sb_top_edge =
- (mi_row & ~MAX_MIB_MASK) * MI_SIZE * SCALE_PX_TO_MV;
- const int active_sb_bottom_edge =
- ((mi_row & ~MAX_MIB_MASK) + MAX_MIB_SIZE) * MI_SIZE * SCALE_PX_TO_MV;
- const int active_sb_left_edge =
- (mi_col & ~MAX_MIB_MASK) * MI_SIZE * SCALE_PX_TO_MV;
- if (src_bottom_edge > active_sb_bottom_edge) return 0;
- if (src_bottom_edge > active_sb_top_edge &&
- src_right_edge > active_sb_left_edge)
+
+ // Special case for sub 8x8 chroma cases, to prevent referring to chroma
+ // pixels outside current tile.
+ for (int plane = 1; plane < av1_num_planes(cm); ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+ pd->subsampling_y)) {
+ if (bw < 8 && pd->subsampling_x)
+ if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
+ if (bh < 8 && pd->subsampling_y)
+ if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
+ }
+ }
+
+ // Is the bottom right within an already coded SB? Also consider additional
+ // constraints to facilitate HW decoder.
+ const int max_mib_size = 1 << mib_size_log2;
+ const int active_sb_row = mi_row >> mib_size_log2;
+ const int active_sb64_col = (mi_col * MI_SIZE) >> 6;
+ const int sb_size = max_mib_size * MI_SIZE;
+ const int src_sb_row = ((src_bottom_edge >> 3) - 1) / sb_size;
+ const int src_sb64_col = ((src_right_edge >> 3) - 1) >> 6;
+ const int total_sb64_per_row =
+ ((tile->mi_col_end - tile->mi_col_start - 1) >> 4) + 1;
+ const int active_sb64 = active_sb_row * total_sb64_per_row + active_sb64_col;
+ const int src_sb64 = src_sb_row * total_sb64_per_row + src_sb64_col;
+ if (src_sb64 >= active_sb64 - INTRABC_DELAY_SB64) return 0;
+
+#if USE_WAVE_FRONT
+ const int gradient = 1 + INTRABC_DELAY_SB64 + (sb_size > 64);
+ const int wf_offset = gradient * (active_sb_row - src_sb_row);
+ if (src_sb_row > active_sb_row ||
+ src_sb64_col >= active_sb64_col - INTRABC_DELAY_SB64 + wf_offset)
return 0;
+#endif
+
return 1;
}
-#endif // CONFIG_INTRABC
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/ncobmc_kernels.c b/third_party/aom/av1/common/ncobmc_kernels.c
deleted file mode 100644
index af951398b..000000000
--- a/third_party/aom/av1/common/ncobmc_kernels.c
+++ /dev/null
@@ -1,1181 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/ncobmc_kernels.h"
-
-// The kernels are only used in the experiment "ncobmc-adapt-weight", which
-// blends four predictions to form a final prediction for an inter-block
-// The indices of the default kernels correspond to
-// 1. the index of the size of the kernels (ADAPT_OVERLAP_BLOCKS )
-// 2. the interpolation modes (NCOBMC_MODE)
-// 3. the prediction the kernels applies to
-
-static int16_t default_ncobmc_krnl_0_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5684, 3601, 1367, 364, 1509, 2313, 4007, 5080 },
- { 3728, 2486, 827, 196, 1434, 2034, 2868, 3000 },
- { 1643, 1465, 726, 208, 890, 1386, 1242, 1293 },
- { 794, 723, 277, -237, 206, 487, 749, 896 },
- { 1176, 730, 286, 136, 281, 262, 724, 953 },
- { 2086, 1958, 783, 539, 751, 984, 1143, 1491 },
- { 2665, 2520, 1402, 1037, 939, 1223, 1593, 1937 },
- { 3451, 3172, 2350, 1291, 1069, 1916, 2672, 3223 }
-};
-static int16_t default_ncobmc_krnl_0_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5541, 8123, 10470, 11908, 11291, 10382, 8800, 6446 },
- { 3338, 5536, 7249, 8080, 7671, 6428, 5280, 3900 },
- { 1732, 3087, 3842, 4325, 4034, 2929, 2318, 1800 },
- { 744, 1217, 1559, 2215, 1957, 1352, 707, 322 },
- { 685, 1082, 1792, 2300, 1975, 1350, 738, 671 },
- { 1168, 2336, 3303, 3965, 3790, 3098, 2909, 2141 },
- { 3005, 4370, 5806, 6716, 6282, 5553, 4782, 3453 },
- { 4748, 6650, 7779, 9010, 9208, 8184, 6987, 5197 }
-};
-static int16_t default_ncobmc_krnl_0_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 6026, 4784, 2400, 1250, 1002, 2371, 3320, 5285 },
- { 8638, 6094, 3257, 1498, 1297, 3145, 5252, 7625 },
- { 10859, 7249, 3868, 1871, 1813, 3569, 6577, 8858 },
- { 11432, 8123, 4216, 1786, 2477, 4370, 6669, 9366 },
- { 11894, 8466, 4870, 1917, 2479, 4656, 7057, 9383 },
- { 11109, 7432, 3924, 1288, 2018, 3946, 6660, 9877 },
- { 10138, 6548, 2830, 461, 2087, 3810, 6170, 9255 },
- { 8613, 5163, 1658, 279, 1694, 3082, 4807, 7897 }
-};
-static int16_t default_ncobmc_krnl_0_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { -833, -80, 2193, 2907, 2623, 1359, 298, -383 },
- { 705, 2300, 5090, 6649, 6024, 4820, 3020, 1892 },
- { 2189, 4625, 7990, 10015, 9679, 8539, 6284, 4464 },
- { 3445, 6356, 10371, 12660, 11773, 10205, 8287, 5828 },
- { 2664, 6149, 9483, 12064, 11681, 10156, 7908, 5409 },
- { 2040, 4690, 8405, 10631, 9862, 8396, 5711, 2909 },
- { 626, 2993, 6387, 8212, 7123, 5840, 3877, 1788 },
- { -402, 1431, 4636, 5850, 4461, 3246, 1964, 122 }
-};
-static int16_t default_ncobmc_krnl_0_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 1465, 553, -76, 10, 635, 756, 1843, 3144 },
- { 687, 117, -404, -300, 238, 280, 696, 1415 },
- { 49, -38, -224, -241, -135, -209, -237, 382 },
- { 48, 37, -266, -273, -235, -137, -208, -94 },
- { 555, -3, -132, -172, -98, 93, 347, 313 },
- { 887, 256, -266, -307, 304, 222, -98, 82 },
- { 1701, 816, 454, 501, 119, 230, 450, 551 },
- { 2732, 1502, 1174, 540, 323, 709, 1002, 936 }
-};
-static int16_t default_ncobmc_krnl_0_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 7707, 10467, 11036, 10942, 10165, 9420, 8728, 5835 },
- { 3167, 5146, 5854, 5771, 4914, 4684, 4357, 3154 },
- { 900, 1646, 2412, 2014, 1974, 1986, 1776, 1005 },
- { -198, -179, 488, 737, 866, 784, 828, 236 },
- { -469, 32, 402, 574, 738, 495, 242, -187 },
- { 186, 1078, 1378, 1480, 1226, 1506, 1656, 745 },
- { 1531, 2525, 3139, 3367, 3535, 3519, 3095, 2171 },
- { 3152, 5453, 6176, 7089, 7310, 6879, 6483, 4916 }
-};
-static int16_t default_ncobmc_krnl_0_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 7378, 3775, 1232, 453, 133, 936, 1688, 4950 },
- { 10336, 5944, 2400, 1175, 168, 954, 2894, 6436 },
- { 11176, 6145, 2051, 829, 543, 1193, 3403, 6517 },
- { 10387, 6062, 2036, 646, 507, 1077, 2998, 6029 },
- { 10768, 6277, 2226, 677, 321, 982, 2845, 6378 },
- { 10072, 5808, 1937, 873, 372, 1396, 3498, 7298 },
- { 8951, 4724, 1216, 104, 656, 1830, 3722, 7558 },
- { 7447, 3372, 468, -135, 99, 1177, 2980, 7260 }
-};
-static int16_t default_ncobmc_krnl_0_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { -229, 1545, 4135, 4921, 5405, 5226, 4081, 2418 },
- { 2120, 5121, 8485, 9692, 11018, 10406, 8380, 5338 },
- { 4205, 8593, 12099, 13717, 13936, 13366, 11402, 8436 },
- { 6068, 10382, 14047, 15190, 15155, 14577, 12684, 10145 },
- { 5458, 10012, 13836, 15243, 15361, 14752, 12876, 9818 },
- { 5153, 9162, 13256, 14256, 14385, 13170, 11245, 8186 },
- { 4140, 8257, 11521, 12362, 12028, 10762, 9062, 6053 },
- { 2966, 5975, 8490, 8807, 8561, 7529, 5836, 3204 }
-};
-static int16_t default_ncobmc_krnl_1_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 4414, 2642, 2518, 1763, 1089, 644, 355, 254, -234, 454, 399, 228, 525, 785,
- 558, 919 },
- { 2989, 3035, 2685, 1476, 1872, 768, 820, -309, -107, 273, 87, 286, 499, 638,
- 929, 1025 },
- { 1779, 1672, 1713, 645, 953, 151, 617, 79, -91, 185, 18, 307, 794, 681, 484,
- 521 },
- { 1429, 1571, 1893, 1493, 949, 288, -232, -248, -152, 179, -50, 74, 107, 329,
- 539, 822 },
- { 1444, 852, 1022, 688, 850, 205, 135, -629, 334, 96, 106, 337, 259, 300, 150,
- 680 },
- { 962, 367, 329, 921, 591, -79, 146, 201, 296, 179, -190, 143, 46, -107, 215,
- 853 },
- { 915, 865, 463, 169, 498, -390, 12, 202, 225, 490, 410, 483, 52, 99, 293,
- 569 },
- { 728, -135, 241, 383, 288, -69, 33, 421, 523, 506, 376, 58, 143, -4, 151,
- 218 },
- { 337, 65, 255, 282, 173, 267, 237, 15, 38, 114, 253, 110, 32, 227, 92, -48 },
- { 317, 115, 295, 231, 380, 435, 331, -97, 392, 393, 51, 59, 276, 41, -33,
- 46 },
- { 31, -14, 86, 250, -36, -214, 210, -79, -117, 401, 193, 440, 171, 200, 8,
- 112 },
- { 46, 19, 165, -6, 75, 180, 266, -98, 76, 276, 6, 29, 230, -49, 177, 168 },
- { 104, -243, -121, 295, -8, 180, 16, -44, 232, 315, 176, 10, 0, -95, -154,
- 141 },
- { 248, 201, 361, 430, -20, -45, 209, -44, 222, 540, 527, 297, 240, 625, 531,
- 409 },
- { 91, 37, 193, 177, 233, 210, -299, 120, 327, 214, 293, 189, 86, 123, 206,
- 356 },
- { 501, 779, 295, 199, 148, 81, -31, 70, 211, 309, 300, 110, 227, 30, 242,
- 261 }
-};
-static int16_t default_ncobmc_krnl_1_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 6603, 7905, 7762, 8159, 8426, 10334, 10792, 10984, 12097, 10534, 11216,
- 10624, 9514, 8715, 8672, 8846 },
- { 5897, 6238, 6272, 7323, 7162, 8091, 9465, 9845, 9929, 9747, 10562, 10737,
- 9059, 7651, 7330, 7314 },
- { 5572, 6017, 5568, 7112, 6617, 6501, 7334, 8494, 8900, 8826, 9852, 8034,
- 6956, 7691, 7513, 6106 },
- { 4564, 3877, 4682, 4586, 5135, 5795, 7968, 7859, 7720, 6548, 6306, 5639,
- 6357, 6514, 6493, 5609 },
- { 4142, 4154, 3332, 4193, 3873, 4977, 4685, 5787, 5707, 5300, 5854, 4720,
- 5452, 5642, 4810, 4250 },
- { 2993, 3176, 3012, 2637, 2664, 4336, 4207, 3687, 4627, 4487, 4847, 4120,
- 4079, 3931, 3730, 3205 },
- { 2479, 2268, 1858, 1737, 2266, 2806, 2919, 3017, 3231, 2964, 3181, 3423,
- 3096, 3025, 2684, 2353 },
- { 1969, 2001, 1997, 1959, 1323, 1565, 1963, 1351, 1957, 1711, 2093, 2057,
- 1762, 1926, 1118, 1367 },
- { 1473, 816, 655, 1628, 1252, 1764, 1723, 1675, 2559, 3029, 1951, 2160, 2305,
- 2299, 1688, 1048 },
- { 3073, 1667, 1324, 1360, 1562, 1774, 2154, 2740, 3281, 3434, 3258, 4095,
- 2823, 2443, 2894, 2449 },
- { 3813, 2830, 3352, 2125, 2627, 2974, 3847, 3720, 4592, 4846, 4787, 5066,
- 4598, 4229, 4032, 3478 },
- { 3415, 2733, 3827, 3637, 3381, 3743, 3768, 4732, 5055, 5445, 5870, 5937,
- 5734, 5980, 5010, 4954 },
- { 4878, 3604, 5532, 4558, 4210, 4880, 4847, 5771, 5136, 6486, 7096, 6426,
- 5765, 6824, 6094, 5753 },
- { 6076, 5817, 5318, 5268, 5784, 5482, 6453, 6582, 6803, 7077, 8113, 8173,
- 8329, 7653, 6448, 6476 },
- { 7549, 5758, 5554, 6383, 7113, 7664, 7123, 6712, 8539, 8997, 9047, 8794,
- 8700, 8760, 7600, 7995 },
- { 7698, 7133, 7048, 7498, 7821, 8401, 9152, 8647, 8934, 8874, 8595, 8789,
- 8828, 8766, 9019, 8783 }
-};
-static int16_t default_ncobmc_krnl_1_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5573, 5972, 5705, 5033, 5738, 3189, 2009, 1476, 2057, 2178, 1869, 2927,
- 3305, 4036, 4017, 5328 },
- { 7539, 7568, 7302, 5564, 4410, 3954, 2153, 2693, 622, 1831, 1753, 1636, 3552,
- 4374, 4319, 6015 },
- { 8753, 7544, 6620, 5710, 6142, 5819, 2731, 2898, 1702, 1487, 2249, 1688,
- 4110, 4483, 5108, 5621 },
- { 9273, 7922, 6245, 6310, 6442, 5272, 3068, 2649, 1599, 2693, 3219, 4501,
- 4588, 4310, 5647, 6894 },
- { 9697, 8245, 7267, 6551, 5199, 4626, 3466, 3256, 2099, 3125, 3608, 4297,
- 3944, 5468, 6056, 7545 },
- { 8831, 8583, 7466, 6937, 6056, 5482, 3407, 3324, 1802, 3128, 3078, 4560,
- 4560, 5901, 6131, 7612 },
- { 9556, 7457, 6602, 7342, 5370, 4431, 3573, 3339, 1668, 3172, 3779, 4564,
- 5744, 7244, 8522, 8407 },
- { 10238, 8809, 7064, 6643, 4885, 4246, 2737, 2684, 2501, 3443, 3761, 6174,
- 5891, 6882, 7585, 8821 },
- { 10151, 10001, 8289, 6859, 6054, 4903, 3809, 3540, 2644, 3424, 3542, 4649,
- 5389, 5384, 6733, 8360 },
- { 9635, 9516, 7609, 7438, 6181, 4529, 4140, 3439, 2568, 3338, 3789, 5195,
- 5510, 6181, 7566, 8512 },
- { 9988, 8848, 6807, 6731, 6139, 5355, 3797, 4097, 3364, 3319, 4230, 5136,
- 5581, 6125, 7748, 8229 },
- { 10252, 9244, 7204, 7260, 6478, 6040, 4659, 3920, 2869, 3263, 4068, 5475,
- 5714, 7183, 7153, 8318 },
- { 9682, 9366, 7096, 6059, 6036, 4463, 3898, 3477, 2065, 2704, 4434, 5167,
- 5502, 6743, 8002, 7443 },
- { 9252, 8211, 6399, 6747, 6498, 5626, 4016, 3880, 2172, 2557, 3576, 4270,
- 4968, 5115, 6840, 7550 },
- { 8753, 8157, 7097, 6500, 5779, 5174, 4190, 2645, 2380, 3239, 4155, 5263,
- 5437, 5337, 5663, 6667 },
- { 9680, 7710, 6921, 5657, 4863, 3990, 3485, 2172, 2620, 3003, 3328, 4112,
- 4806, 6020, 6833, 7212 }
-};
-static int16_t default_ncobmc_krnl_1_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { -219, -121, 416, 1445, 1150, 2238, 3251, 3695, 2496, 3247, 2925, 2639, 3064,
- 2869, 3167, 1320 },
- { -68, -450, 130, 2039, 2952, 3584, 3966, 4171, 5961, 4555, 3995, 3732, 3281,
- 3731, 3827, 2052 },
- { 262, 1161, 2497, 2934, 2690, 3939, 5735, 4938, 5906, 5924, 4296, 6388, 4553,
- 3551, 3297, 4164 },
- { 1091, 3025, 3566, 4005, 3874, 5040, 5600, 6151, 7241, 6990, 6924, 6186,
- 5356, 5256, 3726, 3083 },
- { 1079, 3140, 4769, 4958, 6480, 6589, 8111, 7988, 8255, 7879, 6838, 7052,
- 6751, 5005, 5393, 3931 },
- { 3566, 4255, 5572, 5909, 7098, 6653, 8641, 9199, 9689, 8617, 8673, 7591,
- 7733, 6676, 6324, 4737 },
- { 3411, 5802, 7481, 7149, 8259, 9553, 9900, 9854, 11285, 9779, 9040, 7939,
- 7515, 6037, 4902, 5075 },
- { 3417, 5718, 7095, 7425, 9913, 10666, 11679, 11951, 11429, 10749, 10173,
- 8116, 8610, 7605, 7548, 5992 },
- { 4408, 5515, 7201, 7627, 8922, 9470, 10636, 11166, 11159, 9844, 10673, 9502,
- 8693, 8503, 7905, 7046 },
- { 3340, 5097, 7171, 7366, 8273, 9660, 9784, 10332, 10155, 9232, 9301, 7056,
- 7798, 7746, 5981, 5402 },
- { 2531, 4732, 6148, 7284, 7672, 8287, 8551, 8672, 8567, 7846, 7199, 5757,
- 6057, 5863, 4613, 4578 },
- { 2646, 4394, 5195, 5511, 6471, 6443, 7713, 7854, 8408, 7427, 6461, 4968,
- 4731, 3294, 4066, 2960 },
- { 1692, 3664, 3881, 5480, 6162, 6871, 7635, 7198, 8963, 6891, 4694, 4801,
- 5141, 2932, 2459, 3060 },
- { 769, 2144, 4310, 3945, 4125, 5329, 5712, 5975, 7200, 6220, 4179, 3662, 2868,
- 3007, 2579, 1958 },
- { -45, 2434, 3549, 3335, 3273, 3357, 5394, 6931, 5159, 3956, 2912, 2164, 2187,
- 2187, 2935, 1388 },
- { -1514, 786, 2135, 3045, 3561, 3922, 3800, 5515, 4650, 4225, 4169, 3387,
- 2539, 1590, 317, 161 }
-};
-static int16_t default_ncobmc_krnl_1_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 2375, 1912, 1469, 213, 933, -507, -173, -369, -333, 187, -128, 427, 999,
- 1166, 1515, 2728 },
- { 1857, 1085, 817, 454, 598, 479, 53, -218, -611, 240, 76, 31, 284, 1347,
- 1738, 1317 },
- { 1911, 531, 453, 89, 639, -361, -331, -605, -162, 63, -154, 259, 446, 390,
- 708, 1113 },
- { 818, 1304, 871, 665, 1006, -114, -405, -407, 331, 203, 304, 506, 476, 1053,
- 1155, 879 },
- { 1054, 874, 714, -162, 624, -144, -306, -541, 30, -281, 296, 812, 418, 858,
- 755, 252 },
- { 967, 388, 354, 878, 31, -691, -244, -307, 425, 281, 0, -50, 110, -107, 279,
- 255 },
- { 152, -53, 156, 266, 192, -864, -236, -110, 397, 484, -129, 14, 22, 44, -90,
- 278 },
- { 203, -54, 103, -142, -598, -741, -546, -26, 545, 253, -43, -234, -391, -504,
- -158, -143 },
- { 387, 275, 136, 69, -289, -9, -210, -364, 39, 3, 4, 61, -66, -102, -94,
- -215 },
- { 195, 213, 433, 158, 128, -131, -203, -266, -132, -285, -301, -367, -315,
- -249, -144, -9 },
- { 600, 145, 418, 277, 156, -118, 85, -20, 119, 260, 41, 72, -85, 316, -97,
- -41 },
- { 682, 610, 356, 880, 527, 272, 90, 92, -124, 314, -204, -339, -590, -384,
- -248, -192 },
- { 999, 423, 208, 752, 623, 409, 91, -57, -3, -124, 148, 255, -7, 112, -128,
- -144 },
- { 1007, 710, 609, 766, 264, -163, 324, 291, 219, -61, 24, 507, 74, 109, 127,
- 629 },
- { 2211, 878, 853, 462, 86, 203, -71, 122, -36, 131, 308, 267, 210, 369, 50,
- -96 },
- { 1810, 1630, 1123, 645, 610, 217, -93, -37, -220, -341, -250, -110, 135, 0,
- 112, 93 }
-};
-static int16_t default_ncobmc_krnl_1_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5824, 7106, 8063, 8929, 8632, 9731, 10164, 11047, 11088, 10239, 10606, 8987,
- 8411, 7117, 6115, 5322 },
- { 4980, 6239, 7135, 7245, 7418, 8526, 9440, 9438, 8119, 8336, 7993, 8057,
- 6686, 5210, 4193, 4841 },
- { 2436, 4500, 5019, 5908, 5578, 7270, 7391, 7974, 7281, 6871, 6705, 6327,
- 4867, 4521, 4286, 3605 },
- { 2298, 3501, 4714, 4692, 4835, 5278, 5830, 4968, 4435, 4824, 4373, 4085,
- 3825, 2657, 2539, 2557 },
- { 1643, 2741, 2604, 2664, 1877, 3334, 2995, 3162, 3367, 3104, 3356, 2827,
- 3577, 2359, 1755, 2140 },
- { 742, 1397, 1315, 1332, 1864, 3032, 2472, 2253, 1692, 2071, 2260, 2426, 1951,
- 1610, 1189, 1275 },
- { 482, 869, 598, 288, 769, 1490, 1284, 1692, 883, 1061, 1259, 1239, 1118, 585,
- 219, 571 },
- { 178, 278, 580, 915, 717, 873, 1012, 721, 52, 348, 624, 540, 691, 102, -108,
- 383 },
- { -718, -648, -223, -520, -1000, -754, -688, -639, -528, -414, -440, -365,
- -268, -546, -672, -332 },
- { -256, -226, -395, -158, -311, -325, -66, 87, 533, 705, 261, 344, 484, 692,
- 155, 507 },
- { 204, 448, 131, -571, 889, 712, 626, 349, 261, 578, 240, 1012, 849, 900, 889,
- 977 },
- { 132, 1395, 1847, 1181, 845, 1203, 1920, 2068, 2141, 2071, 1834, 2191, 2130,
- 2522, 1537, 1326 },
- { 140, 1278, 2440, 2063, 1581, 2204, 2781, 2532, 1677, 2426, 2538, 2210, 1568,
- 2564, 2394, 1945 },
- { 2943, 3776, 3833, 3310, 3900, 4118, 4161, 3571, 4059, 4143, 4145, 4273,
- 4034, 3940, 3720, 2418 },
- { 3437, 3906, 4106, 4294, 5303, 5257, 4956, 4027, 5935, 5373, 4102, 4853,
- 5331, 5251, 3964, 4748 },
- { 5493, 5799, 5966, 6535, 7015, 7397, 8011, 6526, 5832, 6257, 6247, 7097,
- 6499, 6272, 5963, 5593 }
-};
-static int16_t default_ncobmc_krnl_1_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 6049, 4906, 3617, 2960, 2187, 1950, 556, 497, 688, 355, 503, 1054, 1170,
- 1641, 2343, 4226 },
- { 7459, 6408, 4326, 3635, 2042, 1565, 492, 572, 746, 338, 719, 797, 2540,
- 2283, 2301, 4089 },
- { 8025, 6914, 5072, 4249, 2793, 1910, 430, 1137, -150, 451, 1061, 872, 1515,
- 2805, 3823, 4550 },
- { 9615, 6936, 5226, 3388, 2611, 2061, 801, 1003, -537, 72, 736, 1347, 2215,
- 3509, 4262, 5097 },
- { 9677, 6521, 5633, 5223, 2996, 2449, 1300, 1136, 160, 918, 488, 801, 2306,
- 3781, 4818, 6441 },
- { 9988, 7509, 6019, 4950, 3376, 2777, 1427, 1395, -118, 310, 393, 1626, 3387,
- 3649, 4737, 7431 },
- { 10542, 7745, 5192, 4494, 1637, 1960, 1212, 1056, -309, 383, 1166, 2107,
- 4048, 4030, 7206, 7851 },
- { 9350, 7480, 4343, 3589, 1748, 1687, 1057, 898, 592, 776, 680, 1960, 3804,
- 4598, 5688, 7834 },
- { 8769, 7236, 5518, 4182, 2776, 2412, 915, 1370, 789, 561, 520, 1146, 3139,
- 4730, 5542, 7514 },
- { 9580, 7116, 5910, 4623, 3085, 2450, 1703, 745, 419, 600, 1077, 1208, 3256,
- 4261, 5611, 6709 },
- { 9725, 7053, 5594, 4217, 2573, 1834, 562, 512, 496, 356, 883, 1360, 3323,
- 4866, 5632, 7594 },
- { 10110, 7367, 5622, 3858, 3720, 2398, 1075, 1687, 616, 461, 1082, 1786, 2570,
- 4242, 5731, 8319 },
- { 9416, 7582, 6054, 3915, 3283, 2035, 1335, 1138, 317, 92, 253, 483, 1715,
- 3597, 5613, 8103 },
- { 8693, 6881, 4626, 3505, 2663, 1949, 751, 792, -343, 55, 303, 460, 2027,
- 3584, 6230, 8704 },
- { 7368, 6609, 5087, 3861, 2790, 1746, 1487, 518, 497, -165, 439, 904, 2514,
- 3735, 6082, 6914 },
- { 7004, 5321, 3472, 2621, 1221, 999, 1172, 377, 850, 864, 866, 647, 2574,
- 3977, 6416, 7777 }
-};
-static int16_t default_ncobmc_krnl_1_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 2085, 2421, 3201, 4245, 4593, 5179, 5800, 5172, 4904, 5558, 5357, 5889,
- 5769, 6415, 6377, 4080 },
- { 2031, 2607, 4062, 5018, 6279, 5766, 6373, 6562, 8085, 7434, 7557, 7449,
- 6834, 7509, 8119, 6106 },
- { 3960, 4394, 5800, 6108, 7339, 7531, 8876, 7849, 9371, 8973, 8753, 8896,
- 9525, 8636, 7540, 7092 },
- { 3599, 4610, 5527, 7597, 7898, 9121, 10115, 10783, 12123, 11248, 10928,
- 10406, 9827, 9129, 8401, 7814 },
- { 3953, 6203, 7382, 8619, 10852, 10722, 12369, 12580, 12777, 12605, 12198,
- 11899, 10047, 9350, 9018, 7521 },
- { 4615, 7038, 8644, 9190, 11073, 11216, 12685, 13003, 14345, 13679, 13689,
- 12344, 10902, 11188, 10148, 7399 },
- { 5141, 7775, 10402, 11309, 13751, 13759, 14094, 13720, 15371, 14418, 14061,
- 12988, 11166, 11692, 9019, 7665 },
- { 6591, 8644, 11320, 11985, 14476, 14526, 14816, 14745, 15159, 14966, 15071,
- 14071, 12238, 12154, 10931, 8266 },
- { 7897, 9483, 10910, 12615, 14865, 14701, 16336, 15966, 16036, 16200, 16266,
- 15506, 13546, 12270, 11580, 9377 },
- { 6808, 9239, 10394, 11719, 13438, 14348, 14923, 15789, 15519, 15341, 15316,
- 15166, 12927, 11656, 10736, 9138 },
- { 5796, 8696, 10198, 12417, 12722, 13926, 15077, 15506, 15468, 15155, 15184,
- 13906, 12262, 10270, 9924, 7815 },
- { 5386, 6960, 8500, 10429, 11262, 12474, 13263, 12505, 13713, 13502, 13632,
- 12702, 12233, 9964, 9329, 6889 },
- { 5768, 7049, 7630, 9626, 10868, 11697, 12128, 12718, 14351, 13953, 13402,
- 13389, 13063, 10072, 8470, 6445 },
- { 3665, 4962, 7272, 8760, 9507, 10431, 11095, 11676, 12400, 12216, 11874,
- 11099, 10214, 8725, 6279, 4598 },
- { 3293, 4948, 6288, 7711, 8156, 9140, 9976, 11683, 9946, 11003, 11496, 10325,
- 8287, 6988, 6251, 4796 },
- { 2010, 3599, 5789, 6548, 7490, 7725, 7264, 9488, 9893, 9573, 9487, 8725,
- 7145, 6110, 3858, 2891 }
-};
-static int16_t default_ncobmc_krnl_2_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 3437, 3490, 4578, 2066, 1672, 1354, 1502, 2345, 2273, -600, 52,
- 272, 484, 2214, -1553, -197, 165, 278, 306, 384, 73, 415,
- -213, 357, 497, 288, 714, 6, -82, -118, 170, 181 },
- { 2505, 3488, 306, 3011, 2631, 181, 636, 2608, 1663, -964, 594,
- -1455, 1057, -1198, -1647, 187, 404, 412, 177, -32, 269, -24,
- 148, 233, -290, -359, -178, -164, -362, -19, -408, 106 },
- { 2588, 3528, 3391, 3134, 1812, 2387, -34, -298, -13, -955, 40,
- -475, 1243, 283, -247, -484, 200, -46, 36, -642, -386, -438,
- 34, 295, 93, -528, -13, 412, -8, 41, -457, 28 },
- { 796, 3353, 435, 3473, 458, 1851, 519, 1061, 259, 942, 416,
- 195, 390, -151, -1141, -710, 716, -401, 33, -771, -759, -336,
- 88, -124, -139, -372, -223, -505, -164, -100, -512, -465 },
- { 3233, 3990, 2698, -107, -448, 297, 331, -13, -530, -383, -464,
- -1530, 715, -899, -1978, -879, 43, 93, -77, -138, -425, -97,
- -167, -348, -460, -95, 280, -45, 235, 172, -357, -200 },
- { 868, 4162, 1417, 487, -1446, -355, 392, -159, 202, 704, -814,
- -3095, -1052, -1482, -745, -1403, -199, -27, -38, -387, -208, 20,
- -64, -130, -265, 81, -20, 238, 49, 121, -137, 495 },
- { 2774, 3478, 2072, 1229, 819, 1359, 106, -222, -1445, -1559, 924,
- -98, 44, -347, 455, -862, -318, -288, -31, 281, -144, -107,
- 148, 103, -171, -239, -134, 25, 125, 108, -142, -129 },
- { 610, 990, -703, 1003, 437, -275, -179, -233, -2041, -445, -1145,
- -488, 335, -2684, -1339, -294, -176, -195, -36, -65, -276, 10,
- -111, -277, -134, -222, -51, 31, -369, -279, -105, 69 },
- { 420, 2773, 375, -372, 489, 989, -900, 1075, 182, 119, -529,
- -470, -504, -2225, 225, 101, -264, -417, -253, -459, -317, -205,
- -528, -7, -43, -268, -116, -857, -608, -208, -216, 220 },
- { 2969, 1927, -314, -476, 402, -637, -838, 835, 1229, 1200, 135,
- -299, -324, -2136, 340, -1563, -309, -98, -408, -137, -154, 668,
- 101, -90, 245, 112, -51, -37, -525, -254, -244, -126 },
- { 1404, -258, 2333, 2019, 309, -29, -2468, 18, -494, 70, -260,
- 245, 515, -1984, -1759, -1003, -504, 104, 472, 197, -38, 265,
- 378, 6, 50, -183, -204, -17, -383, -318, -396, 142 },
- { 807, 637, 712, 1237, -971, -176, -1160, -210, -2072, -782, -959,
- -372, -590, -1159, -1017, -889, -750, -399, -98, -15, 2, -172,
- -48, -488, -628, -12, -25, 136, 229, -200, -212, -472 },
- { -1464, 333, -1978, -1394, -281, -1820, -124, 385, 97, -297, -1497,
- -3, -916, -660, -949, -504, 117, 11, 86, 88, 2, 219,
- 333, -120, -224, 71, 237, -507, 13, -381, -207, -113 },
- { 1100, -717, -1827, -1908, -1030, -1562, 404, 794, 4, -682, -1306,
- -612, -1197, 8, -131, 525, 159, -345, -91, 9, -222, -482,
- -69, 482, 593, -32, -239, -408, -522, -692, -126, 712 },
- { -798, -735, -174, -1695, 819, -737, -15, -426, -750, 876, 34,
- -622, 448, -71, -950, -2094, 74, 170, 18, 57, 156, 443,
- -85, -374, -416, -537, -348, -126, 62, -381, 399, -53 },
- { -552, -1352, 536, -1, -322, -1094, -428, 309, -142, -752, 354,
- 900, 473, -137, -1263, -370, -731, -864, -30, -101, 354, -321,
- -523, 377, 9, -415, -87, -145, -154, -286, 100, 23 },
- { 44, 607, 316, -268, -246, -497, 267, 154, 160, 717, 324,
- 240, -130, -218, -107, -252, -64, 4, 113, -57, -162, 123,
- -5, 143, -312, -66, -230, -33, -57, 60, 153, 85 },
- { 158, 14, -307, -240, -85, -416, 304, -402, -461, -221, 193,
- -123, 384, -142, 48, -77, -378, 36, -56, 20, 2, -240,
- -88, -1, -185, 87, 6, 94, -22, 82, 191, 194 },
- { 417, 259, -85, -170, -45, -151, -402, 136, 28, -40, 101,
- 224, -337, 97, 98, 51, -401, 95, -77, -153, 357, -99,
- -473, -142, -289, -80, -349, -76, -87, 97, 40, 198 },
- { -236, 62, -104, -8, 98, 68, 128, 116, 47, 54, -121,
- -150, -20, -120, 196, -80, 37, 290, 231, 247, 131, -113,
- -126, -87, 65, 250, 260, 102, -68, 234, 76, -87 },
- { 245, 486, 38, -10, -135, 106, 217, -187, -200, 96, 20,
- 117, -40, -97, 68, -139, 276, 8, -55, -53, -187, -20,
- -41, 1, -145, -246, -106, -45, -145, -353, 185, -35 },
- { 448, -172, -496, -63, -84, -106, 151, 9, -143, -180, -38,
- -276, -223, 269, 100, 38, -236, -66, 124, -59, 475, -78,
- -407, -20, -119, -19, 162, -4, -226, 101, 247, 78 },
- { -348, -156, -324, -260, -173, 0, -41, 63, 235, -114, 109,
- -362, -96, 279, -277, 36, 394, 394, 240, 30, -88, 209,
- 29, 176, 59, -20, -244, 50, -104, 192, -157, 48 },
- { -376, -176, 269, -426, -159, -108, -18, -163, 93, 130, -222,
- -40, 539, 176, 164, -62, -709, -354, 502, 664, 243, -414,
- -51, 192, 33, 54, -10, -57, -141, -3, 144, 71 },
- { -137, -636, 627, 6, -129, -159, -45, -150, -15, 402, 207,
- 20, 202, 1, -203, 88, 183, 62, -76, 120, 418, -196,
- -104, -154, -433, -338, -73, 1, -79, -14, -200, 84 },
- { 184, -334, 175, 114, -274, -60, -429, 176, 36, 373, 468,
- 134, 110, -11, -201, -94, 352, 109, 115, 91, 187, -83,
- 21, 0, -154, -180, 288, 0, -61, -197, -246, 42 },
- { -143, 26, 190, -110, -335, -385, -357, 27, 103, -66, -96,
- -189, -337, -150, 129, -104, -176, -418, -216, -118, 28, 126,
- -112, -130, 110, 17, 141, 111, -82, 238, 22, -50 },
- { 104, -95, 48, -239, -40, -148, -327, 244, 323, -102, 244,
- 151, 113, -150, -74, 223, -81, -328, -178, 140, -233, -165,
- 182, 514, 216, -129, -8, 141, -81, 451, -110, -71 },
- { -116, 84, -228, 177, 318, 62, 134, -3, 239, 14, 338,
- 278, -439, -254, 3, -82, -210, -62, -236, -124, 5, -60,
- 112, -18, -115, -31, 5, -65, 278, 4, -19, -130 },
- { 236, -64, -147, -519, 147, -27, 71, -567, -133, 24, -199,
- 229, -107, 126, -141, -148, -35, -34, 68, 230, 8, 72,
- 40, -148, 203, 97, 84, 107, 32, 17, -58, -18 },
- { -43, -408, -101, 120, 118, 168, -170, -233, -323, -120, -339,
- 80, -294, -151, 85, 52, -420, 79, -162, -233, -237, -47,
- -131, -53, -199, 14, 85, -80, 93, -150, -15, 318 },
- { -106, 107, -6, 189, 53, -109, 22, -474, -335, -102, -279,
- -321, -66, 186, -65, -13, 61, 167, 43, -159, -57, -13,
- 37, -125, -137, 132, 161, -156, -27, -276, -89, 15 }
-};
-static int16_t default_ncobmc_krnl_2_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5401, 5987, 4279, 6550, 4858, 4986, 5733, 7172, 8194, 7631, 7549,
- 6971, 9288, 7485, 8583, 9244, 12058, 11530, 10461, 8453, 8304, 11724,
- 8999, 9457, 5018, 6922, 8375, 7860, 7915, 6921, 7703, 8963 },
- { 2308, 2670, 5018, 5298, 3883, 6449, 4267, 4119, 9252, 10082, 7844,
- 7414, 9050, 9261, 8739, 7808, 10974, 10279, 8627, 8840, 9203, 9406,
- 9360, 10574, 10156, 7673, 6238, 8876, 6800, 6423, 6931, 8589 },
- { 6608, 4325, 3372, 5227, 6182, 3670, 5595, 5758, 8575, 8025, 8251,
- 10711, 5449, 6965, 5443, 7178, 9099, 8842, 7132, 7830, 5795, 9882,
- 8939, 8323, 7507, 7248, 8750, 6786, 6940, 4942, 7125, 6399 },
- { 3977, 3060, 4962, 7094, 7211, 6388, 6256, 3960, 7672, 7814, 7711,
- 7237, 7088, 7232, 5716, 6040, 9565, 6643, 8113, 7841, 9849, 10144,
- 8297, 7676, 6792, 8447, 7805, 5475, 5499, 4728, 5379, 7645 },
- { 4598, 4391, 3660, 6284, 6694, 8302, 5610, 5341, 7466, 6298, 6406,
- 7734, 5743, 5155, 5257, 6958, 9035, 11566, 9636, 7825, 8147, 9427,
- 6612, 5526, 7635, 7259, 7696, 7853, 5505, 6744, 9265, 5394 },
- { 5980, 2356, 2746, 5955, 4045, 4283, 5117, 3799, 5386, 5594, 7671,
- 6984, 6232, 6028, 3101, 3391, 5757, 9530, 7408, 6206, 5512, 7867,
- 5144, 8011, 6690, 6994, 4877, 5063, 6175, 5205, 1965, 859 },
- { 2619, 4096, 4225, 4712, 5637, 6418, 6649, 3904, 5463, 5102, 4785,
- 4100, 5127, 3858, 3419, 5301, 6002, 7649, 8260, 6241, 4168, 4551,
- 6153, 5016, 7113, 7845, 5201, 5455, 5069, 2335, 3311, 5194 },
- { 1278, 4942, 4441, 3456, 3791, 5620, 5275, 2243, 5080, 4619, 5834,
- 4859, 4320, 5092, 1481, 846, 4969, 4835, 3646, 5940, 5736, 5862,
- 3628, 5918, 5865, 4945, 4385, 4699, 4342, 5415, 8383, 4711 },
- { 3855, 1678, 2560, 4631, 2765, 1444, 1449, 1895, 4494, 5706, 4813,
- 4882, 3532, 2264, 3222, 5444, 4097, 5236, 5036, 3713, 6547, 4371,
- 5311, 2363, 5113, 6290, 3743, 5343, 5369, 2813, 2486, 1647 },
- { -651, 1098, 2116, 3495, 2289, 1836, 4507, 4057, 5225, 4553, 2631,
- 2791, 2984, 3605, 3416, 3611, 4358, 4719, 3450, 4146, 3973, 3263,
- 3826, 5881, 6402, 4584, 4396, 3689, 2020, 1960, 2100, 4304 },
- { -622, 1848, 379, 112, -1474, 1013, 6023, 260, 1035, 1984, 3811,
- 2362, 1394, 2546, 3347, 2472, 1865, 755, 2251, 1139, 1933, 2252,
- 1163, 3003, 4091, 4792, 3801, 3517, 4247, 3798, 5216, 4543 },
- { 1342, 2229, 1014, 1212, 260, 432, 1975, 99, 2798, 818, 2455,
- 3858, 2231, 3773, 136, 857, 2171, 815, 1966, 1825, 1711, 964,
- 2142, 2514, 5367, 3539, 3241, 3116, 3982, 3839, 3553, 3535 },
- { 1800, 27, 321, 111, 1003, 528, 254, 979, 2444, 2413, 3807,
- 961, 1961, 1173, 2156, 3935, 259, 263, 1815, 1979, 1218, 2393,
- 3738, 1109, 4444, 3726, 3647, 3428, 2966, 4602, 4903, 5851 },
- { 1340, 753, 317, 1318, 738, 1880, -500, -691, 1108, 38, 412,
- 890, 494, 291, -131, 759, -111, 221, -95, 2575, 3099, 3223,
- 3140, 3156, 3952, 1942, 2615, -2313, 2991, 6367, 5744, 4528 },
- { 752, 490, 1255, 2396, 14, 3819, 1319, 1239, 3491, 2464, 3243,
- 3083, 392, 1273, 1712, -226, -931, -2130, 710, 864, 385, 265,
- 1431, 1796, 3063, 3531, 3879, 3986, 3503, 4045, 2539, 3489 },
- { 1943, 170, 358, 1884, 2344, 1566, 92, 1721, 1381, 1115, 723,
- 1670, 2294, 1497, 1697, 973, 1286, 2306, 381, 2582, 2551, 3852,
- 2481, 3432, 2273, 3079, 2076, 3014, 3365, 3906, 2241, 2250 },
- { 1741, -705, 595, 956, 2038, 793, 1518, 148, -524, -881, -487,
- 711, 720, 773, 431, 2181, -435, -841, -1106, -552, 434, -2007,
- -41, -234, -960, -23, 394, -655, 792, 934, 1495, 1947 },
- { 2086, 1360, 97, 1352, -95, 1800, -729, -916, -152, 956, 196,
- 1746, -1973, -690, 472, 1788, -28, 385, 781, 589, -320, 1167,
- -484, 66, 1136, 1038, 1741, 888, 3056, 2114, 3495, 1297 },
- { 1900, 1373, 983, 3718, 1409, 2096, 932, -604, -1370, 1153, 109,
- 58, 104, 2851, 602, -2071, 252, -888, 1428, 2724, 1344, 1567,
- 563, 1902, 1370, 519, -294, 393, 1153, -1032, 2129, 335 },
- { 2652, 2620, 3178, 2344, 2466, 2241, 1145, -101, -635, 306, -1036,
- 638, -2606, -1921, -1098, -328, -324, 2598, 1092, 1832, 493, 2507,
- 1152, 1461, -796, 2126, -742, 1182, 2078, 1549, 2665, 2366 },
- { 1080, 798, 1934, 568, 1218, 3206, 155, 1844, 2313, 3509, 1090,
- 650, 1166, 2515, 1846, 1025, 259, 720, 1587, 3010, 4955, 6457,
- 2952, 2764, -396, 1937, 1563, 673, 828, 4062, 2711, 1548 },
- { 871, 657, 2761, 1756, 2349, 198, -1003, -1105, -1181, -69, 146,
- 3201, -27, 1493, 13, 291, -2260, -468, 1178, 928, 2665, 3887,
- 3140, 1334, 1969, 2687, 544, 3842, 2885, 733, 3419, 1963 },
- { 1491, 1698, 302, 2127, 1256, 907, 1607, 1833, 2061, -536, 988,
- 4380, 2723, -195, 962, 1769, 2466, 1735, 2707, -369, -713, 1599,
- 3031, 2924, 2023, 2045, 5259, 1733, 3517, 4274, 440, 412 },
- { 2163, 1, 167, 1755, 5694, 3272, 739, 4235, 6123, 3811, 4611,
- 5800, 2424, 2409, 1458, 2152, 104, 115, 466, -998, -806, 2824,
- 4473, 2511, 4878, 3258, 5014, 3559, 1003, 2074, -2091, 1403 },
- { 964, 1051, -1527, 1266, 3883, 2349, 1054, 1972, 1929, -249, 3796,
- 2861, 1542, 449, 539, 1942, -16, 58, 2080, 56, 1106, 4248,
- 580, 2540, 3095, 4536, 152, 354, 4067, -2246, 1505, 1981 },
- { 1081, 1440, 324, 736, 2839, 2597, 3712, 2282, 3717, 2483, 1247,
- 4456, 3604, 3415, 2487, 3715, 2073, 2928, 2372, 828, -2700, 2054,
- 4315, -125, 1777, 2211, 2992, 7336, 4216, 3571, 2657, 6780 },
- { 1997, 2104, 1255, 1942, 1335, 1450, 3567, 1447, 3812, 6083, 5233,
- 4484, 3536, 3564, 3290, 4062, 2589, 2816, 3971, 4406, 3481, 2664,
- 1245, 1759, 3353, 1036, 2054, 1299, 2263, 4010, 4171, 3972 },
- { 1519, 4826, -750, 988, 1338, 2999, 212, 3858, 5202, 5306, 5717,
- 3066, 2629, 6461, 6043, 6637, 8388, 7252, 4890, 4161, -1056, 4615,
- 2538, 5633, 3389, 6439, 2985, 7148, 5149, 4509, 8001, 8863 },
- { 1047, 876, 2713, 3913, 2232, 1084, 1702, 2626, 1983, 3744, 2044,
- 3690, 2087, 4497, 2656, 5592, 6247, 4584, 4218, 6097, 6884, 6277,
- 2412, 5097, 7400, 2789, 6089, 6157, 7247, 9712, 11393, 5627 },
- { 2876, 4288, 2443, 3081, 1569, 1823, 1050, 2325, 2558, 2591, 4223,
- 6300, 4237, 4354, 4411, 7502, 4175, 3350, 4208, 1100, 6473, 6664,
- 5460, 4207, 5297, 8047, 6850, 6496, 7866, 10375, 7455, 2868 },
- { 3282, 5838, 6486, 6479, 3474, 4665, 3790, 2882, 5116, 4457, 4649,
- 4208, 4520, 7271, 4363, 7125, 8799, 6540, 10158, 5716, 6794, 5762,
- 6462, 8561, 2742, 7002, 9454, 8451, 8560, 7973, 7759, 6679 },
- { 5957, 7221, 5126, 7057, 5824, 4274, 5374, 6023, 7549, 6239, 7666,
- 6368, 4014, 5338, 7150, 9793, 10608, 9838, 6748, 9691, 5465, 4631,
- 7964, 7692, 8173, 9362, 8989, 11677, 10282, 9960, 6666, 9276 }
-};
-static int16_t default_ncobmc_krnl_2_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 7499, 5941, 5384, 4566, 4006, 3634, 2288, 4112, 2127, 3001, 2639,
- 1927, 467, -1639, 1484, 1143, 66, -316, 626, 1721, 1208, 193,
- 1591, 3903, 8472, 3945, 1882, 4378, 6453, 8972, 11867, 10110 },
- { 7919, 6226, 8601, 3825, 4644, 4380, 3957, 2964, 1316, 3586, 2268,
- 2802, 2193, 1427, 1479, 1353, -55, 373, 271, 979, 526, 1827,
- 2463, 1938, 3963, 4851, 5040, 4192, 3731, 4522, 8903, 6733 },
- { 6373, 4994, 6414, 4822, 4923, 4881, 4383, 6117, 3342, 5068, 2353,
- 2370, 2231, 758, 1768, 1338, 742, 1498, 454, 1453, 1466, -213,
- 177, 1223, 512, 5366, 2462, 4667, 5671, 5039, 6065, 6874 },
- { 9299, 8698, 12939, 6170, 7063, 3147, 3256, 3492, 2696, 4498, 3705,
- 3176, 2797, 1099, 2852, 1331, 527, 1272, -388, 1619, 110, -406,
- 390, 3801, 4468, 3193, 2944, 7284, 7144, 4560, 6320, 8073 },
- { 5937, 4572, 5212, 6678, 5291, 2561, 2752, 4892, 2713, 5203, 4202,
- 1527, -470, 2424, 2850, 1217, 401, 587, 191, 1122, 1314, 1854,
- 3860, 4579, 2455, 5427, 1614, 5037, 5073, 5074, 3101, 7734 },
- { 7035, 5229, 7515, 6523, 7587, 5653, 5311, 4945, 4097, 4237, 2836,
- 2667, 1959, 4095, 1669, 1484, 57, 467, 1028, 642, 2843, 2782,
- 3604, -825, 1592, 4305, 2202, 4432, 4683, 3867, 3520, 9281 },
- { 7248, 3787, 4243, 4710, 3288, 1975, 2766, 4057, 1506, 2644, 1436,
- 818, 1150, 2159, 787, 920, 98, 137, 1065, 306, 3880, 537,
- 3871, 1060, 3821, 3395, 2484, 3532, 4072, 3339, 2638, 3982 },
- { 8810, 5802, 5538, 4090, 3659, 3742, 3818, 6827, 6474, 4756, 4093,
- 3735, 4063, 4586, -1945, 470, 328, -163, 958, 511, 2541, 3057,
- 2972, 4349, 4754, 5115, 5847, 6843, 7299, 6652, 5891, 5655 },
- { 9091, 5007, 6438, 4749, 5610, 3664, 6151, 5188, 3686, 2005, 2670,
- -245, 1788, 3724, 2626, 679, -52, -839, -145, 356, 3488, 1970,
- 1988, 2126, 1099, 2578, 5401, 6965, 4908, 5526, 6748, 5968 },
- { 6412, 7516, 8029, 8748, 6742, 7509, 6552, 4078, 4300, 5066, 4786,
- 3270, 4270, 3875, 2319, 4282, 1640, -843, -439, 427, 1587, 520,
- -28, 2251, 3358, 3049, 4407, 7286, 8994, 7802, 5924, 6824 },
- { 8467, 6838, 3934, 2952, 7200, 5407, 4593, 5882, 3353, 3865, 1870,
- 1535, 2130, 4121, 3527, 1799, -637, -937, 513, 247, 169, 607,
- 2947, 3530, 3717, 6082, 9703, 6867, 2729, 6292, 3084, 4879 },
- { 9934, 8638, 7508, 6894, 7343, 5306, 6208, 6136, 5240, 7136, 3958,
- 1811, 3171, 1064, 2246, 882, 1681, 727, 1694, 769, 1700, 1370,
- 1901, 5812, 3852, 6468, 5875, 5416, 6007, 3348, 3600, 6661 },
- { 10978, 9383, 9741, 10746, 5208, 8469, 4608, 5824, 4424, 3460, 3841,
- 4037, 3687, 1582, 3784, 988, 1974, 1292, 2272, 2128, 2210, 2888,
- -967, 5864, 5568, 4693, 3796, 6361, 4816, 2697, 4559, 6437 },
- { 8329, 9809, 8672, 9375, 7503, 5775, 3454, 4596, 5093, 5033, 4021,
- 2860, 2833, 2782, 3056, -617, 1644, 1759, 2434, 2570, 3312, 3807,
- 3518, 3521, 1126, 2830, 3378, 4432, 3261, 5211, 4073, 10050 },
- { 9992, 8148, 7951, 7194, 5624, 5032, 3296, 2981, 5388, 3910, 2274,
- 1436, 1425, 1053, 2111, 2806, 1606, 1446, 1681, -211, 1877, 1541,
- 1700, 2736, 2088, 2551, 1045, 2977, 2632, 1719, 4896, 5378 },
- { 9403, 8846, 8061, 7478, 5269, 6655, 6312, 4110, 3529, 5802, 3108,
- 3246, 1943, 909, 2436, 1678, 1513, 1243, 797, 213, 3888, 4015,
- 2775, 2082, 2395, 2792, 2136, 2475, 1657, 2156, 1878, 2587 },
- { 9499, 9075, 5426, 6962, 8206, 8057, 3968, 5184, 2759, 2277, 2744,
- 3531, 2518, 367, 1075, 2118, 900, 901, 2964, 3641, 5282, 2186,
- 2416, 2312, 2366, 2149, 1024, 1912, 1119, 220, 401, 727 },
- { 7615, 8271, 8148, 7699, 7063, 7658, 5473, 7497, 7302, 5841, 4165,
- 3092, 734, 2215, 3316, 2226, 1197, 1236, 2996, 5007, 2872, 3460,
- 2371, 1898, 1917, 1442, 853, 1412, 700, 620, 317, 1237 },
- { 8331, 8530, 8633, 7185, 6863, 9076, 5328, 5045, 5378, 4004, 4089,
- 1469, 1341, -333, 2689, 1982, 115, -1158, 383, 1548, 1118, 2864,
- 3154, 1803, 2079, 1676, 1450, 1165, 967, 795, 136, 1184 },
- { 8763, 9102, 6716, 8961, 5448, 6366, 3438, 5722, 5374, 5651, 5422,
- 1728, 1751, 2444, 1024, 1118, 424, 2288, 3655, 2719, 2254, 1313,
- 3476, 1983, 1975, 1502, 1172, 2333, 937, 594, 122, 149 },
- { 8146, 9931, 7629, 8882, 6328, 7491, 5646, 5494, 7238, 7355, 4478,
- 2019, 2646, 3486, 4193, 1121, 562, 1823, 2787, 1720, 2228, 3627,
- 4470, 3351, 2439, 2214, 1926, 2118, 1771, 767, 353, 1062 },
- { 10816, 9814, 10917, 7424, 8207, 9717, 8537, 8728, 7356, 7376, 7246,
- 3223, 1981, 277, 1282, 951, 515, 222, 1392, 789, 4372, 2112,
- 4083, 2706, 3234, 2414, 2655, 1407, 702, 1369, 121, 676 },
- { 11362, 10078, 7520, 7828, 10705, 7300, 7358, 6559, 8337, 7569, 5067,
- 3465, 2417, 1956, 2165, 759, -106, -1282, 1822, 3225, 4767, 5619,
- 4119, 3383, 3877, 2702, 2410, 2459, 1441, 1392, 945, 216 },
- { 10112, 8115, 3762, 5107, 7443, 7676, 7498, 7380, 6235, 7523, 6246,
- 3574, 2749, 3853, 303, 1558, 1896, 1107, 462, 2172, 2388, 4222,
- 2000, 1688, 3560, 2297, 1593, 3679, 3628, 1507, 1549, -188 },
- { 7794, 6437, 6605, 5381, 6404, 4410, 6677, 4233, 4949, 3000, 2812,
- 3756, 1805, 2877, 2098, 1737, 1809, 1427, 378, 2031, 2115, 5006,
- 3159, 3602, 6343, 3503, 3356, 5971, 3138, 3932, 1028, 699 },
- { 6757, 7738, 6538, 8248, 6959, 6557, 5264, 3092, 3765, 1895, 1865,
- 901, 2485, 2217, 1699, 1946, 3573, 1501, 2141, 2177, 180, 1003,
- 1816, 4793, 2112, 4560, 3820, 2881, 4376, 2091, 681, 623 },
- { 9057, 8917, 7385, 7072, 6324, 5492, 5283, 5053, 5785, 4277, 3322,
- 1267, 1946, 1894, 3701, 472, 1658, 1154, 777, 2193, 2349, 3611,
- 3129, 3719, 1781, 5389, 3418, 2463, 3734, 3644, 3365, 2247 },
- { 9444, 9439, 8598, 9152, 6982, 8451, 8279, 6129, 5172, 3730, 2416,
- 2483, 2775, 1913, 1041, -1110, -392, 1068, 556, 598, 4171, 2377,
- 1870, 1906, 5449, 5413, 2589, 3564, 6473, 6692, 3140, 2665 },
- { 10567, 10001, 8225, 8289, 6898, 6856, 3920, 4547, 4297, 1456, 2348,
- 1526, 2343, 2863, 1429, 312, 57, 930, 1619, 1189, 596, 1815,
- 2589, 3141, 1662, 3349, 1311, 4091, 4596, 7321, 5911, 6965 },
- { 9593, 9214, 9132, 8273, 8030, 8135, 5179, 5564, 4052, 4155, 4052,
- 2249, 2178, 1680, 439, 822, -378, -1210, -1149, 3709, 2830, 747,
- 2987, 5873, 795, 5124, 4233, 3887, 5573, 5312, 7258, 11014 },
- { 8373, 8033, 8934, 7880, 7434, 6144, 7528, 5163, 2591, 4301, 2489,
- 4137, 1295, 760, 703, 805, -308, -320, 2205, -1113, 362, 581,
- 2567, 689, 5949, 2652, 1996, 2138, 7469, 4835, 8058, 11132 },
- { 8586, 6026, 7656, 7201, 8141, 7249, 5995, 4896, 3152, 4255, 1711,
- 3498, 3933, 1852, 1444, 715, -104, -695, 4021, 3937, 6478, 1755,
- 935, 384, 1002, 2595, 3359, 4532, 7103, 5192, 12241, 14373 }
-};
-static int16_t default_ncobmc_krnl_2_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { -18, 921, 2116, 3151, 5822, 6391, 6844, 2748, 3794, 6358, 6115,
- 7194, 6145, 8324, 7847, 6181, 4052, 4867, 4967, 5823, 6786, 4035,
- 5989, 2636, 2376, 5222, 5409, 4121, 2105, 626, -3363, -2857 },
- { 3594, 3991, 2433, 4231, 5187, 5335, 7496, 6672, 4132, 3625, 5649,
- 7621, 4052, 6868, 7772, 7010, 5041, 5311, 7273, 6593, 6376, 5150,
- 4421, 3618, 2523, 4188, 5275, 3469, 6209, 5459, 953, 947 },
- { 786, 3510, 3161, 3162, 3435, 5439, 6415, 4784, 4467, 4232, 5708,
- 3775, 7437, 8362, 9398, 8331, 6300, 6049, 8740, 7748, 9508, 7139,
- 7232, 6528, 8257, 4296, 5180, 4497, 3755, 6329, 3620, 3050 },
- { 2273, 1239, -1997, -385, 1641, 4987, 6332, 7869, 5742, 3115, 4523,
- 5739, 6076, 8184, 8936, 9733, 5577, 8872, 8635, 7679, 7192, 6961,
- 7586, 5022, 5256, 5107, 5842, 4127, 3898, 7191, 5184, 1097 },
- { 2576, 3444, 4787, 3494, 4843, 5213, 7669, 6154, 6713, 5224, 6221,
- 8653, 10387, 9676, 10219, 9062, 6899, 4115, 6617, 7548, 7319, 5169,
- 6051, 6609, 6735, 3759, 6779, 3520, 5518, 4355, 4386, 3459 },
- { 2457, 4623, 4686, 3390, 6167, 6776, 5546, 7755, 6678, 5831, 6667,
- 9797, 9222, 7728, 12319, 12899, 10764, 6383, 7947, 9907, 8225, 5677,
- 7690, 9312, 8324, 4971, 9288, 6616, 5448, 7180, 11014, 5709 },
- { 3687, 5015, 5834, 5702, 6619, 6602, 6844, 8607, 10828, 10170, 9206,
- 11527, 10057, 10677, 11683, 11009, 10585, 8869, 7057, 9542, 8465, 11391,
- 6180, 10182, 5594, 5353, 8810, 7358, 7118, 10591, 10569, 7318 },
- { 5659, 4619, 7090, 7819, 8483, 7258, 7446, 7530, 6847, 7424, 7586,
- 8261, 7644, 9373, 18173, 15351, 11259, 11899, 11787, 9977, 8370, 7422,
- 9853, 6375, 5873, 6503, 6194, 4792, 5082, 4563, 2192, 5942 },
- { 3004, 6927, 6994, 7359, 7505, 10247, 9661, 8199, 7979, 8529, 9388,
- 12192, 11555, 12591, 10308, 10143, 12579, 12379, 11700, 12735, 6629, 10209,
- 9592, 11878, 10187, 7755, 7344, 4922, 6699, 8240, 7341, 8532 },
- { 7590, 5795, 6512, 4587, 6933, 7660, 6141, 7410, 5605, 5542, 8790,
- 10597, 9438, 10999, 10270, 10028, 10678, 12591, 13767, 11933, 10966, 11898,
- 12452, 8305, 6352, 8621, 7598, 5409, 5869, 6860, 8606, 5371 },
- { 7095, 7927, 9729, 11290, 10321, 9966, 8226, 10211, 12468, 10459, 10959,
- 12232, 12326, 11686, 11247, 13106, 15660, 16448, 13119, 14772, 14295, 13233,
- 11880, 9805, 8498, 5650, 3043, 5995, 9756, 6592, 8450, 6801 },
- { 4251, 4844, 7130, 7033, 9742, 10794, 9341, 10350, 10410, 9188, 10907,
- 11059, 11547, 12685, 14995, 15511, 13256, 15229, 12788, 13792, 12937, 14179,
- 12355, 8519, 7767, 6376, 7293, 7706, 6134, 9392, 9423, 6656 },
- { 5032, 6597, 8267, 6875, 10431, 9182, 11606, 9174, 9394, 10754, 10214,
- 11384, 11633, 14256, 11377, 11933, 13999, 14801, 12182, 12170, 12927, 10856,
- 13248, 9493, 6586, 7871, 8697, 7094, 8561, 9451, 7116, 4183 },
- { 5550, 6479, 9188, 7562, 9126, 10236, 12984, 11667, 10146, 11981, 13257,
- 13227, 14228, 13278, 13571, 15730, 14696, 14740, 14122, 11230, 10186, 9795,
- 9766, 9187, 10707, 11612, 10594, 14651, 10618, 5465, 6640, 1085 },
- { 6402, 8472, 7318, 8449, 9884, 8237, 11776, 12579, 8248, 9119, 10813,
- 12464, 14087, 14122, 13487, 15884, 15630, 16883, 13968, 15663, 13943, 14099,
- 13309, 12222, 11647, 10827, 11813, 9543, 10171, 10991, 8523, 7564 },
- { 5558, 8716, 7398, 7003, 9081, 9234, 10389, 10222, 11602, 10189, 12165,
- 10551, 11676, 14110, 13499, 14107, 14297, 13673, 15239, 13669, 9564, 8809,
- 11609, 10482, 11688, 10885, 12257, 11025, 11490, 10586, 12134, 11499 },
- { 5054, 7370, 10001, 8690, 6346, 7990, 10600, 10877, 13977, 14230, 13786,
- 11880, 13256, 15455, 14951, 12311, 15970, 16289, 14385, 13318, 10806, 16058,
- 14004, 14150, 15275, 14285, 15169, 15124, 14484, 15130, 14320, 13627 },
- { 6472, 6714, 8422, 7520, 9468, 7309, 11310, 10173, 9680, 9775, 11809,
- 11641, 17217, 14973, 12511, 12431, 15565, 14706, 12653, 10736, 13799, 11984,
- 14576, 14406, 13494, 13775, 13748, 13952, 12627, 13551, 12343, 13637 },
- { 5691, 6196, 6840, 5618, 8130, 5337, 10502, 11764, 12309, 11243, 12058,
- 14603, 15254, 13730, 12988, 16426, 16398, 18336, 14653, 12258, 13528, 12015,
- 13122, 12816, 13238, 14265, 15564, 14875, 14346, 16501, 14057, 14664 },
- { 5142, 4576, 6578, 5068, 8343, 7665, 11649, 10611, 11541, 10331, 12078,
- 14129, 17221, 15930, 16224, 15649, 16231, 11200, 11389, 11572, 13476, 12629,
- 11861, 13013, 15114, 12486, 15663, 12735, 13401, 13979, 13507, 13952 },
- { 6851, 5162, 6778, 6922, 8951, 5567, 10360, 9216, 7036, 5410, 10771,
- 13577, 12588, 10477, 10248, 14359, 15261, 13795, 12048, 11716, 9361, 6278,
- 8997, 10237, 14438, 12459, 12976, 13600, 13892, 11879, 13127, 13802 },
- { 4195, 6070, 3151, 7247, 5889, 6549, 8672, 8715, 10338, 9229, 9026,
- 10246, 14651, 14345, 15001, 15116, 18364, 16684, 13657, 14718, 8840, 10437,
- 9581, 12367, 11264, 11291, 13002, 11111, 13027, 14172, 12590, 13651 },
- { 3818, 4756, 8879, 6693, 4570, 8158, 7459, 7913, 5727, 9446, 10204,
- 8887, 11326, 14337, 13524, 13813, 13628, 15506, 11578, 13470, 12391, 8927,
- 9166, 9882, 10411, 11665, 8963, 12141, 11521, 10521, 15132, 15679 },
- { 4425, 8428, 12163, 9947, 3396, 5526, 8133, 4898, 3913, 4891, 5711,
- 7034, 10657, 9932, 14435, 12716, 15058, 15501, 14937, 14530, 14536, 9746,
- 9923, 11968, 7869, 10734, 9735, 9164, 11842, 12786, 16768, 15073 },
- { 7712, 9515, 10650, 9707, 6201, 9752, 8700, 10334, 9503, 13202, 9555,
- 9748, 12814, 13027, 13920, 12593, 14370, 14808, 13965, 14154, 12735, 7319,
- 12721, 10395, 7361, 8678, 12937, 10057, 9234, 14695, 14044, 13613 },
- { 8309, 7528, 9323, 7254, 6829, 7276, 7831, 10824, 8851, 11605, 12763,
- 10865, 10153, 10736, 12379, 10799, 10370, 11817, 11734, 13290, 18692, 13378,
- 10209, 11690, 12616, 9779, 9257, 6142, 7818, 10903, 13276, 8893 },
- { 5420, 5315, 7529, 7453, 9027, 9825, 7865, 9813, 6673, 6090, 7914,
- 10790, 11205, 11064, 9239, 11947, 12306, 12802, 11856, 9896, 10502, 9968,
- 12099, 11011, 11103, 9920, 10747, 12477, 10458, 8485, 8805, 10199 },
- { 5275, 2169, 8448, 6454, 8077, 5060, 8189, 6133, 5673, 7424, 7993,
- 10659, 10836, 8138, 9347, 10570, 8447, 8359, 11071, 11453, 13480, 9521,
- 11755, 8294, 7308, 4637, 10781, 5515, 4843, 4737, 5330, 4893 },
- { 4846, 5401, 5671, 3987, 6910, 8363, 10605, 9189, 9832, 11154, 11632,
- 10874, 12377, 9266, 12273, 10543, 10287, 10912, 10745, 9206, 8851, 8327,
- 11242, 8123, 7431, 10266, 8947, 6186, 4259, -682, -920, 3901 },
- { 3634, 2920, 4925, 5515, 6626, 6450, 10063, 9047, 9880, 9577, 8277,
- 7582, 10044, 10186, 11630, 8182, 12589, 14249, 13236, 11328, 7042, 8880,
- 7868, 6442, 10067, 3096, 5190, 5874, 2890, 668, 1718, 2480 },
- { 4732, 2901, 1056, 1878, 5356, 5406, 5212, 8538, 8974, 7742, 9588,
- 7933, 10867, 8487, 11203, 8392, 8301, 10070, 4166, 11993, 9436, 10071,
- 7464, 7158, 7848, 6669, 4825, 5838, 236, 3720, 562, -1751 },
- { 1899, 3004, 3605, 1918, 2347, 4957, 5010, 5918, 6020, 5972, 7291,
- 6820, 8455, 8985, 7833, 5877, 5796, 7048, 5548, 2886, 4467, 10008,
- 7443, 8399, 7314, 4277, 3852, 296, -983, 1487, -2474, -7290 }
-};
-static int16_t default_ncobmc_krnl_2_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 4140, 3361, 5678, 1978, 3443, 3453, 2905, 2131, 4836, 2612, 1530,
- -831, -257, 584, -1193, -391, 107, -47, 32, 125, 282, 684,
- 161, 23, -22, -95, 555, -405, 569, -268, -92, 105 },
- { 4680, 4183, 4456, 4730, 4264, 4681, 2310, 2034, 3081, 2493, 2012,
- 1397, 1521, -881, -976, -668, -606, -768, -273, 256, -4, -290,
- 64, -55, -444, -989, -316, -496, 206, -169, -158, -87 },
- { 3199, 3846, 3775, 632, 2359, 3492, 3355, 53, -1201, 145, 263,
- -93, -1435, 415, -844, 954, -241, -483, -165, -191, -561, -185,
- -300, -258, -154, -654, 308, -64, -36, -150, 95, 146 },
- { 680, 2863, 889, 1721, 3444, 2472, -27, 2458, 816, -186, 123,
- 3214, 2029, 2485, -631, 323, 1030, -275, 196, -532, -537, 153,
- 274, 61, -453, -283, -533, -1062, -145, -388, 158, 0 },
- { 1962, 4004, 1406, -535, 1315, 2669, 2522, 654, 3394, 4205, 2731,
- -40, -118, 599, -511, 618, 162, 840, 43, 253, -59, 222,
- 64, -21, -671, -179, 241, 283, 902, 226, 305, -204 },
- { 516, 1205, 3201, -5, 1479, 945, 2129, -628, 3181, 900, 1439,
- 1128, 799, -158, -431, 347, -118, 527, 389, 268, -73, 2,
- 534, 133, -287, -19, 561, 329, 394, -120, 38, -461 },
- { 2130, 2022, 1966, 210, 447, 402, 1249, 1677, 2353, 1113, 1723,
- 1300, 2060, -144, 420, 2008, -417, -74, -197, 135, 217, 310,
- 152, 339, -99, -81, 279, 44, 54, -160, -82, 4 },
- { 2134, -1849, -990, -93, 1932, 2119, 2954, -371, -1021, -831, 1662,
- 1330, 1634, 246, -777, 852, 130, -67, 191, -316, -429, -240,
- -147, -198, 92, -15, 310, 141, -10, 146, 35, 85 },
- { 2763, 4779, 994, 1054, 2625, 2031, 1784, -161, 1142, 1052, 2300,
- 2462, 1943, 516, 816, 27, 18, 171, 158, -311, -636, 20,
- -463, -235, 145, 339, 240, -354, -110, 41, 404, 353 },
- { 3625, 3557, 2333, 950, 2020, 2445, 2562, 1506, 2571, 1559, 4781,
- 2030, 1325, 2507, 2045, 1896, -526, -22, -272, -143, -189, 17,
- 10, 405, 143, 414, -95, -229, -215, 0, -347, 83 },
- { 2808, 1062, 1502, 411, 1139, 998, 1577, 1233, 1637, 998, 1846,
- 2487, 3868, 2225, 533, -51, -6, -180, -30, 186, -175, 247,
- 352, 57, 83, 290, 330, 160, 165, 354, -465, 131 },
- { 2809, 2966, 2929, 1435, 2875, 1948, 130, 1168, 252, 1276, 2838,
- 3507, 3001, 1410, 312, 1941, -336, -431, -190, -194, -130, -336,
- 238, 75, -472, -189, 123, 61, -583, 147, 305, 200 },
- { -23, 2306, 2169, 33, 1848, 1832, 2721, 49, 1435, 585, 1036,
- 2116, 1658, 1011, 815, 920, 101, 108, 262, 299, 283, 357,
- 268, 141, -71, -285, 205, 142, -71, 224, 252, 156 },
- { 1447, 2625, 4643, 2096, -847, -154, 2876, 1050, 104, -873, -327,
- 146, -596, 622, -337, 1317, -61, 9, -201, 110, 90, 644,
- 337, 204, 155, 278, 320, -306, -504, 357, -108, 132 },
- { -16, 2815, 1344, -2044, 2236, -549, 586, 409, 30, 152, 1588,
- 243, -115, 291, -30, -170, -96, -10, 433, 205, -134, 17,
- 528, -16, -22, -198, -43, -143, -224, 270, 153, 37 },
- { 1478, 829, 628, 1055, 1323, -406, -282, -12, 418, 40, -795,
- -286, -627, -41, -448, 454, -267, -258, -129, -57, -44, -406,
- -260, -67, 134, -196, -236, -125, 35, -62, -137, -5 },
- { 220, 26, -380, -257, -90, -453, -196, -56, -193, 37, 131,
- 151, -88, -695, 66, -113, -200, -144, 132, -48, -244, -207,
- -178, 268, -107, -1, 69, 337, -84, -197, 87, 119 },
- { 7, 3, -85, -185, 334, -86, -69, 152, -320, -239, 587,
- 415, 246, 290, -146, -134, -9, -69, -66, -148, -41, -206,
- -148, 283, -144, -287, -73, 93, -23, 247, 398, 174 },
- { 46, -256, -114, -61, -532, 103, 32, -223, 24, -20, 132,
- 339, 61, -381, -711, -160, -200, -334, 78, 173, -281, -139,
- -27, 134, -120, 96, 110, -251, -114, -32, -299, -183 },
- { -193, 28, -134, 200, 155, -316, -363, 285, 268, 665, 233,
- -127, 436, -20, -536, -163, 51, -40, 162, 78, -27, 192,
- -34, -40, -17, -205, 203, 106, -62, -211, -84, 60 },
- { -440, 312, -195, 221, 251, -388, -116, -252, -101, 92, -244,
- -694, -27, 198, -3, 255, -257, -17, 0, 143, -20, 48,
- -68, 110, -130, -340, 136, -45, -138, 251, -111, -2 },
- { 325, 219, -68, 215, -177, -206, 14, 108, -291, 211, 92,
- -62, -166, -218, -158, -220, -279, 199, 113, -263, 271, 153,
- -433, -16, 19, -322, -28, 258, -295, -300, -285, -123 },
- { -345, 543, 356, -541, -726, -205, -332, -397, -10, -132, 232,
- 132, 308, 324, 229, 79, -151, 161, 143, -40, -144, -464,
- 32, -364, -11, -99, -285, 61, -258, 182, -28, 107 },
- { -55, 70, -78, -269, -709, -52, 351, 94, 80, 268, 249,
- -56, 189, -191, -60, -88, 15, -205, 111, -62, 21, 85,
- 77, -107, -35, -13, -107, -472, -546, -197, 5, 115 },
- { -363, -297, 246, -84, -419, -230, 283, -128, 34, -27, 112,
- 125, 166, 163, 176, -422, 14, -238, -80, -153, 313, -366,
- -208, -54, -260, 48, -176, 21, -91, -295, -270, 40 },
- { 85, 242, 107, -41, -283, -390, -105, 360, 181, -720, -582,
- 27, -96, -350, -217, -189, -135, -12, 280, 86, 3, 25,
- -126, -213, -384, 41, -15, 101, -68, 143, -211, 86 },
- { -183, 13, 274, -46, -86, -633, 181, -232, -90, -106, -22,
- 332, -12, -16, -30, 87, 5, 46, 37, -99, 27, 292,
- -74, -94, -237, -16, -145, 76, -106, 227, -52, 168 },
- { 40, -258, -140, -6, 203, 146, -64, -88, -183, 221, 62,
- 67, 114, -216, -307, -560, -197, -46, 149, -126, -120, -316,
- -36, -227, -200, 115, -41, -51, 97, 123, -47, 103 },
- { -51, 44, -99, -230, -156, -46, -145, -412, -56, 48, -239,
- 222, 83, -339, -196, -64, 175, 149, -140, -316, 6, -62,
- -27, -56, -21, -269, 229, -7, 122, -18, -129, 86 },
- { -372, 106, 18, 172, 364, 19, -245, -73, -124, 164, -9,
- 14, 214, -67, -217, -175, -45, 119, -194, 36, 18, -83,
- 126, 196, 112, -297, -102, 104, -74, -152, 19, 199 },
- { 314, 81, -49, -188, 48, -82, -4, 107, -221, -4, 207,
- -245, 197, -37, -185, -50, -56, -214, 100, -231, -31, -2,
- 21, -53, -215, -77, 168, -23, 82, 5, 155, 169 },
- { 258, 188, -27, -27, 165, 29, -17, 100, -27, -80, -80,
- 196, 23, -391, -533, -171, 84, -137, 0, 14, 251, 99,
- 35, 88, -28, 1, 144, -96, -235, 176, 103, -85 }
-};
-static int16_t default_ncobmc_krnl_2_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 5724, 6155, 5101, 6937, 3616, 3940, 3066, 5662, 7104, 5021, 4979,
- 5907, 4968, 7085, 6582, 7719, 9143, 4128, 6447, 4879, 7061, 11362,
- 7837, 9965, 7152, 6477, 6581, 5803, 1819, 5309, 8559, 10776 },
- { 1775, 3231, 4026, 2629, 4438, 6309, 5114, 2895, 5657, 6541, 6734,
- 5994, 7468, 4555, 9911, 5200, 5402, 1698, 4298, 6112, 6417, 6691,
- 4816, 6195, 4139, 5856, 3358, 1993, 1542, 661, 1660, 4762 },
- { 1953, 726, 336, 2519, 4189, -753, 2993, 4957, 5850, 4298, 3651,
- 5353, 3255, 5491, 7815, 3406, 3928, 2987, 4148, 4276, 3530, 8058,
- 5079, 5821, 4622, 3354, 3146, 2460, 489, 1550, 1587, 1399 },
- { -801, 328, 103, 886, 1381, 2280, 4320, 2452, 1215, 6261, 2206,
- 4849, 4488, 3829, 6128, 5213, 1739, 3173, 4425, 4567, 5845, 5197,
- 5910, 6147, 4260, 3730, 4240, 5420, 307, 672, 963, 3278 },
- { -1721, -2596, -155, 3029, 3428, 2390, 2321, 3757, 1383, -1283, -1621,
- 1418, 2475, 4188, 5570, 3575, 799, 4017, 2856, 1426, 2012, 2722,
- 3669, 4104, 3800, 4116, 3275, 3739, 326, 95, 2421, 3075 },
- { -551, -927, -520, 2944, 2518, -722, -215, 1875, 137, 2182, 2761,
- 159, 762, 3693, 1681, 2600, 880, 3273, 4470, 5007, 4272, 3074,
- 2474, 4254, 6828, 4219, 3671, 2407, 1044, 129, -478, 2814 },
- { -2686, -1229, 1372, 4761, 4668, 1462, 509, 2727, 930, 2438, 3542,
- 1456, 1961, 541, 1063, 1426, 3603, 2873, 2412, 2999, 2101, 3739,
- 2385, 5494, 5444, 5655, 5034, 381, 321, 90, 2585, 4160 },
- { -4203, 479, 1122, 2688, 2124, 942, -2136, -1643, -491, 2581, -2155,
- -2375, 559, 582, 2202, 2081, 3774, 3330, 1101, 894, 3410, 3691,
- 2509, 5195, 6226, 5471, 5022, 2525, 778, 1212, 2736, 3350 },
- { -2415, -2903, 4719, 5860, 4006, 2692, 4035, 4143, 2498, 4377, 2058,
- 488, 1429, 3199, -11, 2009, 2087, 2903, 155, 522, 4521, 2221,
- 2310, 3124, 2870, 1941, 3262, 2258, 1515, 2257, 1584, 1048 },
- { -1469, -2652, -561, 2135, 389, -522, -589, 447, -847, 268, -1641,
- -1540, -1513, -1334, -599, -581, 2848, 2828, 1416, 2157, 2198, 925,
- 2421, 1437, 1963, 369, 2195, -548, 2051, 868, 824, 2683 },
- { -2620, -3631, -4548, -885, 629, 523, -528, -2178, -1743, 1644, 353,
- -2687, -3041, -1722, 283, 178, 1594, 1190, 968, -386, 2305, 1317,
- 245, 1443, 968, 800, 471, 521, 1564, 669, 903, 243 },
- { -1791, -3282, -4140, -1753, -1006, -374, 1027, -176, -1477, -891, 191,
- -912, 497, 96, 359, 1045, 1467, 172, 1303, 2510, 3516, 3671,
- 789, -807, 2670, 1483, 547, -521, -1219, -1856, 1008, 1053 },
- { -1427, -2698, -3949, -436, 801, -614, -1548, 523, -176, -683, 423,
- -871, 820, -2279, -143, 375, 768, 2306, 5249, 1302, -338, -396,
- -1590, -608, 1469, 2344, -187, -693, 599, -661, -458, 160 },
- { -3491, -3877, -2952, 1252, 767, -3037, -3638, 188, 587, 710, 1416,
- 1176, -319, -473, 1873, -1997, 725, 596, -94, 1875, 2992, -519,
- -139, 1938, 1025, 521, 760, 1090, 3648, 392, 564, 902 },
- { -2186, -3264, -1742, 2634, -36, -51, -1253, -314, -908, -459, -1701,
- -1437, -991, 84, 1265, -964, 402, 1454, -772, -927, 1765, 1543,
- 484, 2346, 3310, 1887, 1754, 3058, 1474, 728, -466, -1646 },
- { -1826, -332, 48, 744, -618, -97, -165, -155, -908, -143, 1285,
- 1739, 1185, 885, 1134, -531, -15, -526, 543, 1438, 2026, 3022,
- 558, 1827, -139, 1792, 2022, 769, 2400, 444, -1572, 598 },
- { 165, -357, 15, 666, 1315, 1155, 376, -7, 991, 213, 1687,
- -34, 452, 352, 203, 1605, 1484, -498, 581, 533, 467, 1744,
- 1315, 874, 82, 900, 1437, -692, -417, 456, -271, -1132 },
- { 646, 210, 320, 1208, 145, 971, 396, -448, 557, 1876, -1791,
- 913, -1288, -452, 1015, 925, -1197, -49, -285, 442, 1093, -410,
- 125, 519, -52, 513, 1497, -1337, 298, -402, 820, 732 },
- { -796, 627, -1017, 2972, 4463, 2331, 1387, 1496, 1796, 1608, 1681,
- -877, 881, -160, -581, -433, 949, 471, 307, 140, -946, -597,
- 247, 650, 1143, 694, 10, -682, 890, 409, 617, 810 },
- { 1653, 4435, 2388, 294, 2578, 1229, 1072, 1871, 465, 1650, 1524,
- -430, -1195, -3427, -116, 1117, 217, 967, -254, 259, -55, 1425,
- 1583, -1261, -1773, 1232, 2886, 646, 1346, 1518, 2090, -837 },
- { 2020, 728, 2038, 316, 5725, 4193, 890, 1490, 584, 2705, 694,
- -892, 34, 2041, 972, 332, -295, -218, -756, 2193, 1672, 1440,
- 2310, -2136, -2204, 399, -753, 743, 3155, 2521, 3534, 166 },
- { 824, 1664, 991, 853, 700, -80, 148, -908, -194, -620, 1053,
- -368, 1616, 1250, 1449, 3140, -1065, 286, 2226, -590, -570, -1131,
- 477, -61, -708, 519, 586, 1148, 898, 1653, 4697, 1581 },
- { 2014, 1921, -210, 556, 686, -561, -1239, -1345, -664, -138, -215,
- -343, 1019, 1294, 519, -179, 212, -299, -2160, -1450, -329, 293,
- 691, 162, -645, 1079, 2005, 1466, 1127, 2263, 730, 179 },
- { 5629, 4670, 597, 2030, 3873, 3698, 54, 2714, 62, 352, 2177,
- 908, 1306, 1504, 1464, -288, -106, -69, -179, -900, -1340, -4,
- 877, 487, 2606, 358, 2055, 1131, 1421, 931, -477, 1173 },
- { 757, -493, 1510, 2513, 4514, 4649, -478, 2069, 124, -1186, 2855,
- 1906, 1420, 1738, 19, 1916, 1195, -519, 32, 512, 230, 528,
- 43, -263, 1314, 1350, 137, -256, 939, 256, 168, -201 },
- { 663, 947, 699, 3239, 4730, 5279, 1739, 1659, 2774, -1660, -1677,
- 185, 3745, 1319, 2347, 477, 364, 531, 608, -520, -783, -123,
- -59, -345, 1202, 1766, 88, 883, 654, 1399, -1082, 658 },
- { 4534, 5694, 5332, 4909, 4828, 4761, 7376, 3834, 2327, 4737, 7135,
- 5306, 6337, 5240, 5578, 4321, 2107, -205, 1387, 597, 1112, 904,
- 1567, 610, 461, 371, 250, 602, 358, 1807, -617, -59 },
- { 6124, 8363, 9624, 5674, 7043, 4437, 3846, 3121, 3477, 2818, 5445,
- 3618, 5067, 3996, 5759, 7185, 2150, 785, 1581, 2084, 3321, 4828,
- -545, 510, 2309, 2501, 1594, 2028, 528, 113, 248, 550 },
- { 8154, 9890, 6292, 6421, 8295, 4403, 7503, 5496, 7256, 3699, 2845,
- 3725, 5365, 5905, 7170, 2903, 733, 4614, 3856, 4346, 7099, -902,
- -1492, 1703, 2321, 1842, 3488, 1690, 982, 524, -467, -687 },
- { 5338, 10331, 7754, 7014, 3581, 5660, 5471, 5420, 3976, 2548, 6486,
- 9144, 6584, 5442, 6795, 4845, 5182, 2855, 8246, 3660, 5417, 1845,
- 1803, 288, 1434, 639, 1404, 2752, 923, 1055, 741, -984 },
- { 4457, 7110, 5195, 5959, 6818, 8562, 5548, 2071, 5544, 8734, 7080,
- 4737, 9481, 7672, 8374, 7638, 4204, 3562, 3758, 3598, 5016, 2863,
- 3927, 5001, 4677, 4444, 2481, 1773, 2525, 3142, 4840, 3965 },
- { 1134, 3249, 4702, 5483, 4471, 7234, 7281, 6240, 5891, 7577, 3826,
- 5886, 4798, 7117, 6319, 7264, 4115, 5613, 4674, 4999, 4518, 2501,
- 6830, 4913, 2356, 789, 1926, 2190, 1914, 1434, 987, 1761 }
-};
-static int16_t default_ncobmc_krnl_2_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 6131, 7769, 6548, 6297, 4967, 4708, 3127, 5937, 697, 748, 1850,
- 2290, 2945, -80, 216, 377, 318, 1009, 2112, 2962, -886, 849,
- 510, 4160, 2257, 2875, 4589, 5345, 7363, 5350, 6815, 1644 },
- { 6949, 8044, 7295, 7318, 3142, 2084, 1819, 3048, 1654, 1831, 1344,
- 3344, 2065, 2889, -88, 3746, 696, 1143, 232, 1444, 1587, 4125,
- 3991, 3840, 5642, 4933, 3560, 6540, 5865, 6663, 6729, 5520 },
- { 7816, 4894, 7089, 7533, 4271, 6814, 1972, 3845, 3755, 3498, 3571,
- 1884, 3171, 1843, 70, 2358, 2622, 1241, 143, 2657, 3804, 2968,
- 1781, 262, 2864, 4345, 1302, 5434, 7815, 10560, 9211, 8202 },
- { 10656, 7490, 8639, 7975, 4318, 7432, 6148, 3321, 3776, 2781, 3544,
- 246, 2350, 793, 1600, 1266, 2372, -1382, -983, 1926, 493, 447,
- 2275, 3510, 4789, 3766, 878, 2353, 3314, 6282, 5853, 3709 },
- { 11083, 7270, 6211, 6170, 4927, 4198, 3939, 4605, 1734, 2009, 2950,
- 546, 722, 99, 550, 597, 2350, 41, 1314, 1148, -183, 1143,
- 5392, 3550, 3102, 1161, -556, 1700, 7598, 8412, 6019, 9654 },
- { 10358, 7350, 6589, 5975, 3587, 6201, 4603, 3974, 2262, 886, 1815,
- 1899, 1642, 2894, 1557, 228, 1625, 1879, 838, 182, 919, 1168,
- 3272, 1155, 889, 2292, 128, 4478, 5205, 7668, 8767, 10921 },
- { 8569, 4702, 5397, 5147, 2577, 4301, 2139, 1630, 721, 1721, -218,
- 1595, 275, 1133, 1051, -777, 1556, -245, 972, 106, 2205, 385,
- 1410, 366, 3348, 2139, -164, 3111, 2656, 5036, 6021, 4847 },
- { 7654, 5535, 5975, 4580, 3005, 5483, 4637, 5560, 6252, 4946, 4508,
- 3600, 1824, 1528, 338, 131, 1290, 309, 344, 3110, 3607, 2484,
- 1062, 1267, 1426, -860, 1155, 6137, 2415, 5482, 6846, 4916 },
- { 8060, 5296, 4396, 2040, 867, 1189, 3555, 3397, 3438, 664, -1931,
- -1938, -1414, 1317, 762, -312, -655, -801, -243, 2795, 1663, 1314,
- 1478, 2856, 562, 1075, 3211, 7482, 2988, 3880, 4156, 3289 },
- { 8146, 7596, 7056, 7622, 5755, 7181, 7862, 4736, 4932, 3146, 1043,
- -422, -813, -2152, 1444, 441, 3599, 395, 2173, 755, 4245, 3047,
- 1545, 1062, 1159, 1621, 209, 6521, 7385, 7730, 6511, 8959 },
- { 9567, 8044, 7535, 6969, 3284, 4284, 4734, 4758, 5177, 2342, 230,
- -1852, -839, -769, 222, 255, -315, -16, 1101, -28, 3561, 2004,
- -260, 789, 1856, 1960, 4962, 4207, 2425, 8406, 6771, 7796 },
- { 8019, 7612, 8357, 5521, 4711, 3374, 4391, 7093, 5013, 3608, 238,
- -1564, -1662, -1373, -198, -1045, 100, 2694, 1251, 489, 2110, 1670,
- 188, -1362, 953, 2340, 3361, 3595, 6405, 7676, 1634, 7730 },
- { 10177, 6488, 5822, 5121, 2615, 2725, 3372, 4849, 2232, 2548, 2841,
- 874, 895, 307, 1293, -150, 411, -981, -815, -24, 936, -2339,
- 254, 3019, 5892, 4302, -2171, 6747, 7198, 5638, 4832, 9538 },
- { 7260, 9945, 2818, 1106, 6179, 6331, 5106, 1814, 5997, 4045, 1456,
- -230, 297, 1045, 1918, -126, 752, 1014, 999, -506, 198, -732,
- -1900, 139, 749, 3999, 5614, 5241, 6339, 8316, 3673, 7681 },
- { 11101, 6954, 7475, 5729, 4242, 6118, 4569, 2348, 5307, 3762, 2933,
- -1610, 988, -1178, -104, -151, -507, 491, -906, 1236, 3075, 1525,
- 1631, 2901, 2758, 1303, 1578, 6405, 3807, 7189, 8468, 9262 },
- { 6835, 4602, 5501, 5568, 4338, 6143, 4304, 3557, 3258, 3797, 1242,
- 968, 1683, -251, 1218, 301, 1257, 1924, 985, 1251, 3051, 433,
- 1756, 167, -660, 3884, 3450, 7202, 6544, 5184, 7556, 9366 },
- { 5991, 6762, 3854, 4856, 6714, 5701, 4072, 2489, 422, -365, 1488,
- 1660, 725, 1157, -778, 654, 313, -18, 3162, 3065, 2925, 2391,
- 827, 5547, 461, 2487, 1492, 5810, 7042, 5284, 3995, 6870 },
- { 6435, 8283, 4732, 5896, 5599, 4229, 4798, 3309, 3128, 941, 2565,
- 394, 257, 2477, 721, 1494, 3161, 1409, 1306, 2534, 1261, 2719,
- 756, 4388, 570, 5416, 3719, 6067, 4092, 2565, 6299, 10504 },
- { 6042, 7417, 5391, 4671, 3245, 7547, 3777, 3203, 2044, 583, 2083,
- 1971, 1721, 1948, -169, 1197, -1141, -480, 2155, 1033, 1313, 268,
- 1857, 4493, 3083, 2005, 5347, 4397, 10144, 4828, 6622, 9817 },
- { 7202, 5045, 6601, 6937, 3704, 5796, 5061, 3575, 2383, 1389, 3111,
- 1751, 1603, 2813, 174, 706, -569, 2620, 1735, 1418, 1871, -1542,
- 168, 2156, 5107, 6329, 4968, 7018, 6279, 6864, 5898, 9157 },
- { 5722, 5683, 4189, 4814, 2883, 5508, 5100, 1625, 2169, 3680, 1884,
- 2109, 462, 1145, 334, 515, 191, 441, 1058, 917, 1528, -96,
- 1843, 5395, 4498, 5681, 4193, 5196, 8356, 5303, 7262, 10141 },
- { 5879, 5779, 7257, 3873, 6911, 6238, 5672, 3583, 3261, 3048, 2536,
- -310, -1046, -69, -660, 417, -719, -2058, 1740, 888, 2746, 1367,
- 1668, 1090, 1830, 1153, 5047, 7336, 3380, 7160, 4422, 9401 },
- { 7809, 7945, 8385, 8535, 7803, 3953, 5065, 3185, 2013, 1659, 1648,
- 769, 292, -135, 114, -579, 713, 1407, -1181, 1569, 3525, 5630,
- 219, 3518, 3739, 3432, 7282, 6357, 619, 5779, 10116, 6448 },
- { 9496, 7224, 5342, 5960, 5092, 4225, 4353, 3995, 3631, 1662, 1413,
- 762, 534, 126, -551, -1025, 2327, 602, -452, 1285, 2103, 2579,
- -1369, 2724, 6353, 3925, 4631, 9139, 4974, 6630, 7755, 4125 },
- { 5226, 7729, 5768, 5815, 4531, 2948, 3029, 2603, 2549, 1366, 119,
- 405, 21, -1831, -327, -287, -415, -1317, -214, 3017, 1586, 2436,
- 868, 1094, 290, 668, 2117, 756, 1228, 2700, 5743, 8052 },
- { 6262, 5531, 4454, 4616, 3913, 2022, 4240, 2241, 4201, 2506, 1810,
- 628, -496, -779, -471, 394, 756, 1666, -445, 490, 575, -478,
- 894, 1182, 822, 626, 1782, 1781, 5333, 5482, 1760, 8187 },
- { 6488, 6875, 4960, 6837, 4564, 1871, 390, 2940, 4330, 1634, 131,
- -1102, -1451, -928, -1067, -419, -614, -2, 1017, 1066, 1051, 917,
- 1097, 844, 465, 513, 2377, 1031, 3548, 5088, 4516, 10564 },
- { 6497, 6047, 5649, 7156, 4974, 3683, 2875, 4421, 1502, 1244, 668,
- -30, -1465, -59, -399, -721, 954, -281, -2, 664, 1039, 814,
- 758, 1911, 319, 4247, 1848, 1606, 2536, 2189, 1372, 7759 },
- { 5994, 5659, 6777, 6693, 4758, 2986, 1463, 1186, 2116, -166, 499,
- 73, -1151, -164, 279, -895, -169, 339, 1194, 1772, 752, 1649,
- 1696, -2615, 1581, 1740, 1789, 1832, 1899, 510, 2135, 7149 },
- { 9107, 4250, 5418, 4334, 613, 2618, 3395, 4809, 1724, 873, -78,
- -1146, -431, -547, -1104, -1128, -6, -290, 945, 794, 564, 1670,
- 737, 4540, 1574, 6285, 2596, 2859, 1191, 1428, 5614, 8419 },
- { 5905, 4490, 6470, 3636, 2119, 1731, 3532, 2461, 2391, 473, 176,
- -562, 389, -1300, -916, -1436, 371, 567, 1038, 866, 59, 195,
- 679, -721, 2994, 3260, 1813, 1589, 850, 1982, 7410, 11546 },
- { 7265, 8775, 6672, 6657, 6182, 3732, 3222, 4564, 2644, 790, 924,
- -596, 628, -681, -57, -236, 103, 364, 603, 1420, 309, 787,
- 1257, 770, 2453, 3401, 1175, 434, 792, 4019, 8792, 11773 }
-};
-static int16_t default_ncobmc_krnl_2_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
- { 391, -894, -939, 1155, 4362, 4297, 7296, 2684, 3758, 8010, 8044,
- 9041, 8748, 8816, 10796, 8701, 6840, 11306, 7814, 8456, 9952, 3511,
- 7870, 2227, 7018, 7148, 4672, 5660, 6657, 6007, 1098, 3866 },
- { 2970, 945, 619, 1701, 4540, 3326, 7140, 8401, 6001, 5524, 6311,
- 5657, 5333, 9833, 7547, 8127, 10894, 14326, 12130, 8591, 8408, 5873,
- 7524, 6398, 7054, 6594, 9788, 8347, 8784, 9253, 8154, 6170 },
- { 3423, 6928, 5192, 5699, 5575, 6852, 8083, 7546, 8019, 8464, 8910,
- 9251, 11401, 8637, 9356, 9671, 10065, 12652, 12275, 9662, 9627, 5550,
- 9836, 10565, 9075, 9350, 11656, 8549, 8120, 4437, 5501, 6658 },
- { 5859, 5714, 6766, 5830, 7266, 4208, 5956, 8173, 10615, 7557, 10533,
- 8101, 7530, 9292, 9312, 9603, 11268, 14896, 12761, 10435, 10584, 10602,
- 7945, 6677, 7798, 9184, 11805, 9688, 12921, 9831, 9425, 9409 },
- { 5068, 7732, 8953, 7750, 6739, 7145, 7635, 7400, 9896, 11465, 12344,
- 14483, 13309, 11497, 10778, 11614, 13096, 11519, 12197, 13573, 14652, 12324,
- 7270, 8764, 10162, 11289, 13446, 10681, 7564, 7663, 7650, 3879 },
- { 6073, 8775, 7134, 7485, 8815, 9982, 9893, 11182, 10807, 12415, 10385,
- 13211, 13198, 9974, 13590, 13229, 14029, 10733, 10710, 10950, 11286, 12150,
- 10133, 10858, 8958, 9903, 12033, 9177, 9756, 8710, 8055, 3108 },
- { 8368, 10916, 7650, 6261, 8713, 10236, 12507, 10373, 12385, 11135, 11343,
- 12039, 12114, 14871, 13861, 13742, 11649, 13839, 13207, 13160, 11863, 11950,
- 12423, 10188, 7712, 8705, 11270, 12864, 13370, 11422, 7881, 7390 },
- { 10805, 12233, 10301, 9238, 9352, 7871, 10959, 12870, 11641, 9692, 12373,
- 13839, 12380, 14055, 14653, 13348, 11227, 12844, 14769, 12714, 9815, 10484,
- 12966, 10123, 8644, 11791, 9911, 7598, 13225, 9539, 6774, 8055 },
- { 7987, 9257, 6281, 7446, 8911, 10506, 7039, 9031, 9319, 10294, 13979,
- 15391, 14445, 11372, 14852, 14690, 14954, 14129, 16319, 13385, 10855, 12837,
- 13065, 10647, 12815, 13043, 9686, 7003, 12028, 10211, 10237, 11699 },
- { 6073, 7893, 7571, 5698, 8244, 7305, 6581, 9719, 9746, 11432, 12215,
- 16346, 17408, 17379, 13508, 14637, 10471, 13204, 13089, 13632, 10135, 12397,
- 12431, 13511, 13140, 13999, 14081, 10639, 7173, 7807, 9433, 4659 },
- { 6634, 10941, 11920, 9920, 11356, 10608, 10624, 12593, 11330, 11413, 13971,
- 18455, 16400, 16654, 15373, 16023, 15144, 15413, 14357, 16626, 10718, 12841,
- 16053, 14104, 13496, 13334, 10605, 11490, 12221, 6956, 9178, 8213 },
- { 7366, 9121, 9253, 11198, 9839, 11458, 10864, 8319, 12656, 12437, 13128,
- 15378, 14565, 16278, 15940, 14457, 15156, 13972, 14035, 13587, 10888, 11376,
- 15176, 18483, 13236, 12754, 12347, 13247, 11785, 10432, 13455, 7419 },
- { 7665, 10318, 12372, 11702, 11166, 12470, 11859, 10983, 12921, 13947, 12106,
- 14300, 13037, 17367, 14444, 15259, 15107, 14974, 11715, 14835, 15525, 18775,
- 17479, 13835, 9101, 10034, 18554, 10201, 8666, 11181, 11767, 6530 },
- { 11169, 7696, 11879, 11938, 10302, 13271, 12067, 13360, 9715, 12528, 13879,
- 15312, 17012, 15194, 12951, 17211, 14989, 14796, 15695, 14942, 13140, 17003,
- 18104, 14131, 14490, 11607, 9697, 10346, 6890, 7337, 12248, 7668 },
- { 7494, 9902, 9327, 10081, 9955, 10895, 12521, 13971, 11975, 12950, 13579,
- 19214, 16537, 17208, 15292, 17698, 16633, 14485, 17676, 15920, 11698, 13314,
- 13747, 11163, 10360, 13396, 13119, 7073, 11331, 8217, 8258, 8754 },
- { 9934, 11319, 10239, 9047, 11387, 10784, 12566, 13038, 13663, 12717, 14675,
- 14008, 14178, 15820, 14510, 16181, 15440, 15283, 15009, 13767, 11372, 13359,
- 14352, 14480, 17066, 10914, 11175, 8554, 7428, 10827, 10561, 6443 },
- { 10016, 9986, 12912, 11133, 8475, 9995, 12150, 14006, 15182, 16531, 13117,
- 14634, 15313, 15598, 16928, 14269, 14814, 17080, 12532, 12849, 13261, 12479,
- 14442, 9716, 15960, 13029, 13398, 10927, 9854, 10849, 12580, 10547 },
- { 9295, 7913, 11422, 9455, 10319, 11278, 11274, 13394, 13038, 13821, 15044,
- 14686, 17187, 14091, 14823, 14137, 14455, 15111, 15447, 13582, 14076, 14295,
- 15643, 11185, 16015, 10747, 11235, 11551, 12009, 13990, 8881, 5003 },
- { 11095, 8615, 12138, 8821, 9239, 6419, 11207, 11937, 12556, 14236, 12501,
- 14976, 13740, 15006, 17876, 15826, 16800, 16761, 13880, 15072, 16296, 16857,
- 14333, 11125, 12310, 13605, 10932, 12928, 5472, 11185, 9435, 5957 },
- { 7725, 6887, 7535, 8957, 9967, 9700, 10640, 10680, 13275, 12682, 11517,
- 15207, 15552, 17018, 16856, 14725, 16692, 12845, 14748, 14656, 14606, 16310,
- 14672, 15510, 13069, 9039, 8315, 8606, 8826, 8214, 8487, 7999 },
- { 9071, 9686, 10375, 11046, 7539, 7106, 10540, 13531, 13747, 9927, 14071,
- 15876, 15935, 13026, 15104, 15296, 16773, 16198, 16098, 13165, 13227, 15002,
- 12319, 13015, 14240, 10673, 12818, 10497, 5016, 8298, 5706, 6088 },
- { 9366, 8741, 8215, 11450, 8961, 10464, 10575, 13631, 13635, 13752, 12735,
- 17169, 16010, 15438, 15786, 13083, 18481, 17990, 12316, 16370, 13953, 16000,
- 14693, 15392, 15242, 15049, 10809, 7658, 12399, 7866, 7570, 5544 },
- { 6903, 5972, 7864, 7864, 8655, 13231, 12904, 14949, 15064, 15007, 14738,
- 15847, 14769, 14910, 15543, 17103, 15630, 15115, 19594, 16319, 13352, 10936,
- 15453, 13064, 13305, 12008, 7408, 8514, 14898, 8171, 5583, 9657 },
- { 1309, 4431, 10551, 8701, 8152, 8547, 11642, 9601, 12635, 14116, 12560,
- 14796, 14370, 14959, 15558, 17801, 14148, 16067, 16927, 16084, 15633, 13749,
- 16805, 13274, 7467, 12136, 9815, 6584, 10514, 9020, 9109, 10981 },
- { 10778, 9464, 8877, 8157, 7779, 9056, 13584, 11871, 13714, 16259, 13305,
- 13956, 14785, 16328, 16541, 15199, 15586, 18478, 16668, 13019, 14279, 13814,
- 15684, 15613, 15050, 14345, 14327, 15869, 14316, 13744, 10738, 8497 },
- { 9411, 9691, 11139, 8582, 8038, 9492, 10534, 12154, 9249, 16286, 16839,
- 15572, 13252, 16207, 14760, 15743, 15428, 14223, 15971, 16378, 16607, 16993,
- 15698, 15766, 14771, 13969, 14551, 13631, 10451, 9360, 15908, 7460 },
- { 5565, 3814, 5832, 4698, 7091, 10412, 8442, 9852, 9831, 10137, 9167,
- 11864, 11520, 12092, 11930, 12431, 14914, 16568, 13978, 14847, 14215, 14290,
- 13812, 15033, 15711, 15541, 13908, 14681, 12577, 9266, 12542, 5718 },
- { 3740, 2245, 1259, 3575, 4190, 8150, 9742, 8948, 11592, 12108, 10225,
- 12748, 12684, 12687, 11339, 10475, 13481, 15937, 14669, 13780, 12167, 11074,
- 16225, 14201, 13966, 9544, 12974, 12797, 13248, 13990, 14819, 7995 },
- { 2296, 817, 3435, 3505, 3507, 9072, 7580, 10139, 7087, 12821, 13297,
- 12396, 12113, 10999, 9149, 14466, 15677, 11290, 11487, 10612, 8552, 15725,
- 16233, 17367, 12511, 13088, 10898, 12875, 13386, 15384, 14845, 9849 },
- { 2320, 1714, 3209, 4858, 11853, 8126, 7775, 6246, 10834, 12812, 9996,
- 8379, 10020, 11558, 10914, 12851, 11272, 13723, 7409, 11919, 10393, 12987,
- 13756, 11382, 13258, 9754, 12513, 10697, 14356, 14065, 10023, 8748 },
- { 5715, 4721, 4773, 6968, 7426, 6196, 7322, 11771, 8704, 7198, 8944,
- 12478, 6336, 10064, 9132, 10252, 11884, 12483, 11504, 12168, 11346, 13354,
- 11779, 12178, 8942, 8770, 11937, 13047, 12938, 11277, 4002, 710 },
- { 7743, 4184, 5058, 4276, 5576, 5393, 5919, 5500, 7881, 8102, 11726,
- 10912, 10943, 10344, 10654, 9537, 12118, 10565, 11112, 9964, 11328, 13005,
- 8273, 10626, 11596, 12198, 13157, 13884, 13912, 10737, 6497, 2938 }
-};
-
-void get_default_ncobmc_kernels(AV1_COMMON *cm) {
- av1_copy(cm->ncobmc_kernels[0][0].KERNEL[0], default_ncobmc_krnl_0_0_0);
- av1_copy(cm->ncobmc_kernels[0][0].KERNEL[1], default_ncobmc_krnl_0_0_1);
- av1_copy(cm->ncobmc_kernels[0][0].KERNEL[2], default_ncobmc_krnl_0_0_2);
- av1_copy(cm->ncobmc_kernels[0][0].KERNEL[3], default_ncobmc_krnl_0_0_3);
- av1_copy(cm->ncobmc_kernels[0][1].KERNEL[0], default_ncobmc_krnl_0_1_0);
- av1_copy(cm->ncobmc_kernels[0][1].KERNEL[1], default_ncobmc_krnl_0_1_1);
- av1_copy(cm->ncobmc_kernels[0][1].KERNEL[2], default_ncobmc_krnl_0_1_2);
- av1_copy(cm->ncobmc_kernels[0][1].KERNEL[3], default_ncobmc_krnl_0_1_3);
- av1_copy(cm->ncobmc_kernels[1][0].KERNEL[0], default_ncobmc_krnl_1_0_0);
- av1_copy(cm->ncobmc_kernels[1][0].KERNEL[1], default_ncobmc_krnl_1_0_1);
- av1_copy(cm->ncobmc_kernels[1][0].KERNEL[2], default_ncobmc_krnl_1_0_2);
- av1_copy(cm->ncobmc_kernels[1][0].KERNEL[3], default_ncobmc_krnl_1_0_3);
- av1_copy(cm->ncobmc_kernels[1][1].KERNEL[0], default_ncobmc_krnl_1_1_0);
- av1_copy(cm->ncobmc_kernels[1][1].KERNEL[1], default_ncobmc_krnl_1_1_1);
- av1_copy(cm->ncobmc_kernels[1][1].KERNEL[2], default_ncobmc_krnl_1_1_2);
- av1_copy(cm->ncobmc_kernels[1][1].KERNEL[3], default_ncobmc_krnl_1_1_3);
- av1_copy(cm->ncobmc_kernels[2][0].KERNEL[0], default_ncobmc_krnl_2_0_0);
- av1_copy(cm->ncobmc_kernels[2][0].KERNEL[1], default_ncobmc_krnl_2_0_1);
- av1_copy(cm->ncobmc_kernels[2][0].KERNEL[2], default_ncobmc_krnl_2_0_2);
- av1_copy(cm->ncobmc_kernels[2][0].KERNEL[3], default_ncobmc_krnl_2_0_3);
- av1_copy(cm->ncobmc_kernels[2][1].KERNEL[0], default_ncobmc_krnl_2_1_0);
- av1_copy(cm->ncobmc_kernels[2][1].KERNEL[1], default_ncobmc_krnl_2_1_1);
- av1_copy(cm->ncobmc_kernels[2][1].KERNEL[2], default_ncobmc_krnl_2_1_2);
- av1_copy(cm->ncobmc_kernels[2][1].KERNEL[3], default_ncobmc_krnl_2_1_3);
-}
diff --git a/third_party/aom/av1/common/ncobmc_kernels.h b/third_party/aom/av1/common/ncobmc_kernels.h
deleted file mode 100644
index 358b7b7c8..000000000
--- a/third_party/aom/av1/common/ncobmc_kernels.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include "av1/common/enums.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/common.h"
-
-#ifndef AV1_COMMON_NCOBMC_KERNELS_H_
-#define AV1_COMMON_NCOBMC_KERNELS_H_
-
-void get_default_ncobmc_kernels(AV1_COMMON *cm);
-
-#endif // AV1_COMMON_NCOBMC_KERNELS_H_
diff --git a/third_party/aom/av1/common/obmc.h b/third_party/aom/av1/common/obmc.h
index f3940490f..3918c82c6 100644
--- a/third_party/aom/av1/common/obmc.h
+++ b/third_party/aom/av1/common/obmc.h
@@ -12,31 +12,31 @@
#ifndef AV1_COMMON_OBMC_H_
#define AV1_COMMON_OBMC_H_
-#if CONFIG_MOTION_VAR
typedef void (*overlappable_nb_visitor_t)(MACROBLOCKD *xd, int rel_mi_pos,
- uint8_t nb_mi_size, MODE_INFO *nb_mi,
- void *fun_ctxt);
+ uint8_t nb_mi_size,
+ MB_MODE_INFO *nb_mi, void *fun_ctxt,
+ const int num_planes);
static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm,
MACROBLOCKD *xd, int mi_col,
int nb_max,
overlappable_nb_visitor_t fun,
void *fun_ctxt) {
+ const int num_planes = av1_num_planes(cm);
if (!xd->up_available) return;
int nb_count = 0;
// prev_row_mi points into the mi array, starting at the beginning of the
// previous row.
- MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
+ MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
const int end_col = AOMMIN(mi_col + xd->n8_w, cm->mi_cols);
uint8_t mi_step;
for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max;
above_mi_col += mi_step) {
- MODE_INFO **above_mi = prev_row_mi + above_mi_col;
- mi_step = AOMMIN(mi_size_wide[above_mi[0]->mbmi.sb_type],
- mi_size_wide[BLOCK_64X64]);
-#if CONFIG_CHROMA_SUB8X8
+ MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
+ mi_step =
+ AOMMIN(mi_size_wide[above_mi[0]->sb_type], mi_size_wide[BLOCK_64X64]);
// If we're considering a block with width 4, it should be treated as
// half of a pair of blocks with chroma information in the second. Move
// above_mi_col back to the start of the pair if needed, set above_mbmi
@@ -47,12 +47,10 @@ static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm,
above_mi = prev_row_mi + above_mi_col + 1;
mi_step = 2;
}
-#endif // CONFIG_CHROMA_SUB8X8
- MB_MODE_INFO *above_mbmi = &above_mi[0]->mbmi;
- if (is_neighbor_overlappable(above_mbmi)) {
+ if (is_neighbor_overlappable(*above_mi)) {
++nb_count;
fun(xd, above_mi_col - mi_col, AOMMIN(xd->n8_w, mi_step), *above_mi,
- fun_ctxt);
+ fun_ctxt, num_planes);
}
}
}
@@ -62,35 +60,32 @@ static INLINE void foreach_overlappable_nb_left(const AV1_COMMON *cm,
int nb_max,
overlappable_nb_visitor_t fun,
void *fun_ctxt) {
+ const int num_planes = av1_num_planes(cm);
if (!xd->left_available) return;
int nb_count = 0;
// prev_col_mi points into the mi array, starting at the top of the
// previous column
- MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
+ MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
const int end_row = AOMMIN(mi_row + xd->n8_h, cm->mi_rows);
uint8_t mi_step;
for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max;
left_mi_row += mi_step) {
- MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
- mi_step = AOMMIN(mi_size_high[left_mi[0]->mbmi.sb_type],
- mi_size_high[BLOCK_64X64]);
-#if CONFIG_CHROMA_SUB8X8
+ MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
+ mi_step =
+ AOMMIN(mi_size_high[left_mi[0]->sb_type], mi_size_high[BLOCK_64X64]);
if (mi_step == 1) {
left_mi_row &= ~1;
left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride;
mi_step = 2;
}
-#endif // CONFIG_CHROMA_SUB8X8
- MB_MODE_INFO *left_mbmi = &left_mi[0]->mbmi;
- if (is_neighbor_overlappable(left_mbmi)) {
+ if (is_neighbor_overlappable(*left_mi)) {
++nb_count;
fun(xd, left_mi_row - mi_row, AOMMIN(xd->n8_h, mi_step), *left_mi,
- fun_ctxt);
+ fun_ctxt, num_planes);
}
}
}
-#endif // CONFIG_MOTION_VAR
#endif // AV1_COMMON_OBMC_H_
diff --git a/third_party/aom/av1/common/odintrin.c b/third_party/aom/av1/common/odintrin.c
index 868efacc9..7584b2e52 100644
--- a/third_party/aom/av1/common/odintrin.c
+++ b/third_party/aom/av1/common/odintrin.c
@@ -13,16 +13,6 @@
#include "av1/common/odintrin.h"
-#if defined(OD_ENABLE_ASSERTIONS)
-# include <stdio.h>
-
-void od_fatal_impl(const char *_str, const char *_file, int _line) {
- fprintf(stderr, "Fatal (internal) error in %s, line %d: %s\n",
- _file, _line, _str);
- abort();
-}
-#endif
-
/*Constants for use with OD_DIVU_SMALL().
See \cite{Rob05} for details on computing these constants.
@INPROCEEDINGS{Rob05,
diff --git a/third_party/aom/av1/common/odintrin.h b/third_party/aom/av1/common/odintrin.h
index a50c456c1..e87c5a0bf 100644
--- a/third_party/aom/av1/common/odintrin.h
+++ b/third_party/aom/av1/common/odintrin.h
@@ -14,10 +14,6 @@
#ifndef AV1_COMMON_ODINTRIN_H_
#define AV1_COMMON_ODINTRIN_H_
-#if defined(_MSC_VER)
-# define _USE_MATH_DEFINES
-#endif
-#include <math.h>
#include <stdlib.h>
#include <string.h>
@@ -30,71 +26,8 @@
extern "C" {
#endif
-# if !defined(M_PI)
-# define M_PI (3.1415926535897932384626433832795)
-# endif
-
-# if !defined(M_SQRT2)
-# define M_SQRT2 (1.41421356237309504880168872420970)
-# endif
-
-# if !defined(M_SQRT1_2)
-# define M_SQRT1_2 (0.70710678118654752440084436210485)
-# endif
-
-# if !defined(M_LOG2E)
-# define M_LOG2E (1.4426950408889634073599246810019)
-# endif
-
-# if !defined(M_LN2)
-# define M_LN2 (0.69314718055994530941723212145818)
-# endif
-
-/*Smallest blocks are 4x4*/
-#define OD_LOG_BSIZE0 (2)
-/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
-#define OD_NBSIZES (5)
-
-/*There are 4 transform sizes total in AV1 (4x4, 8x8, 16x16 and 32x32).*/
-#define OD_TXSIZES TX_SIZES
-/*The log of the maximum length of the side of a transform.*/
-#define OD_LOG_TXSIZE_MAX (OD_LOG_BSIZE0 + OD_TXSIZES - 1)
-/*The maximum length of the side of a transform.*/
-#define OD_TXSIZE_MAX (1 << OD_LOG_TXSIZE_MAX)
-
-/**The maximum number of color planes allowed in a single frame.*/
-# define OD_NPLANES_MAX (3)
-
-# define OD_COEFF_SHIFT (4)
-
-# define OD_DISABLE_CFL (1)
-# define OD_DISABLE_FILTER (1)
-
-#if !defined(NDEBUG)
-# define OD_ENABLE_ASSERTIONS (1)
-#endif
-
-# define OD_LOG(a)
-# define OD_LOG_PARTIAL(a)
-
-/*Possible block sizes, note that OD_BLOCK_NXN = log2(N) - 2.*/
-#define OD_BLOCK_4X4 (0)
-#define OD_BLOCK_8X8 (1)
-#define OD_BLOCK_16X16 (2)
-#define OD_BLOCK_32X32 (3)
-#define OD_BLOCK_SIZES (OD_BLOCK_32X32 + 1)
-
-# define OD_LIMIT_BSIZE_MIN (OD_BLOCK_4X4)
-# define OD_LIMIT_BSIZE_MAX (OD_BLOCK_32X32)
-
typedef int od_coeff;
-/*This is the strength reduced version of ((_a)/(1 << (_b))).
- This will not work for _b == 0, however currently this is only used for
- b == 1 anyway.*/
-# define OD_UNBIASED_RSHIFT32(_a, _b) \
- (((int32_t)(((uint32_t)(_a) >> (32 - (_b))) + (_a))) >> (_b))
-
#define OD_DIVU_DMAX (1024)
extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
@@ -116,14 +49,6 @@ extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
#define OD_CLZ0 (1)
#define OD_CLZ(x) (-get_msb(x))
#define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
-/*Note that __builtin_clz is not defined when x == 0, according to the gcc
- documentation (and that of the x86 BSR instruction that implements it), so
- we have to special-case it.
- We define a special version of the macro to use when x can be zero.*/
-#define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
-
-#define OD_LOG2(x) (M_LOG2E*log(x))
-#define OD_EXP2(x) (exp(M_LN2*(x)))
/*Enable special features for gcc and compatible compilers.*/
#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
@@ -146,36 +71,6 @@ extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
#define OD_ARG_NONNULL(x)
#endif
-#if defined(OD_ENABLE_ASSERTIONS)
-#if OD_GNUC_PREREQ(2, 5, 0)
-__attribute__((noreturn))
-#endif
-void od_fatal_impl(const char *_str, const char *_file, int _line);
-
-#define OD_FATAL(_str) (od_fatal_impl(_str, __FILE__, __LINE__))
-
-#define OD_ASSERT(_cond) \
- do { \
- if (!(_cond)) { \
- OD_FATAL("assertion failed: " #_cond); \
- } \
- } while (0)
-
-#define OD_ASSERT2(_cond, _message) \
- do { \
- if (!(_cond)) { \
- OD_FATAL("assertion failed: " #_cond "\n" _message); \
- } \
- } while (0)
-
-#define OD_ALWAYS_TRUE(_cond) OD_ASSERT(_cond)
-
-#else
-#define OD_ASSERT(_cond)
-#define OD_ASSERT2(_cond, _message)
-#define OD_ALWAYS_TRUE(_cond) ((void)(_cond))
-#endif
-
/** Copy n elements of memory from src to dst. The 0* term provides
compile-time type checking */
#if !defined(OVERRIDE_OD_COPY)
@@ -190,85 +85,10 @@ void od_fatal_impl(const char *_str, const char *_file, int _line);
(memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) ))
#endif
-/** Linkage will break without this if using a C++ compiler, and will issue
- * warnings without this for a C compiler*/
-#if defined(__cplusplus)
-# define OD_EXTERN extern
-#else
-# define OD_EXTERN
-#endif
-
-/** Set n elements of dst to zero */
-#if !defined(OVERRIDE_OD_CLEAR)
-# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n)))
-#endif
-
-/** Silence unused parameter/variable warnings */
-# define OD_UNUSED(expr) (void)(expr)
-
-#if defined(OD_FLOAT_PVQ)
-typedef double od_val16;
-typedef double od_val32;
-# define OD_QCONST32(x, bits) (x)
-# define OD_ROUND16(x) (x)
-# define OD_ROUND32(x) (x)
-# define OD_SHL(x, shift) (x)
-# define OD_SHR(x, shift) (x)
-# define OD_SHR_ROUND(x, shift) (x)
-# define OD_ABS(x) (fabs(x))
-# define OD_MULT16_16(a, b) ((a)*(b))
-# define OD_MULT16_32_Q16(a, b) ((a)*(b))
-#else
-typedef int16_t od_val16;
-typedef int32_t od_val32;
-/** Compile-time conversion of float constant to 32-bit value */
-# define OD_QCONST32(x, bits) ((od_val32)(.5 + (x)*(((od_val32)1) << (bits))))
-# define OD_ROUND16(x) (int16_t)(floor(.5 + (x)))
-# define OD_ROUND32(x) (int32_t)(floor(.5 + (x)))
-/*Shift x left by shift*/
-# define OD_SHL(a, shift) ((int32_t)((uint32_t)(a) << (shift)))
-/*Shift x right by shift (without rounding)*/
-# define OD_SHR(x, shift) \
- ((int32_t)((x) >> (shift)))
-/*Shift x right by shift (with rounding)*/
-# define OD_SHR_ROUND(x, shift) \
- ((int32_t)(((x) + (1 << (shift) >> 1)) >> (shift)))
-/*Shift x right by shift (without rounding) or left by -shift if shift
- is negative.*/
-# define OD_VSHR(x, shift) \
- (((shift) > 0) ? OD_SHR(x, shift) : OD_SHL(x, -(shift)))
-/*Shift x right by shift (with rounding) or left by -shift if shift
- is negative.*/
-# define OD_VSHR_ROUND(x, shift) \
- (((shift) > 0) ? OD_SHR_ROUND(x, shift) : OD_SHL(x, -(shift)))
-# define OD_ABS(x) (abs(x))
-/* (od_val32)(od_val16) gives TI compiler a hint that it's 16x16->32 multiply */
-/** 16x16 multiplication where the result fits in 32 bits */
-# define OD_MULT16_16(a, b) \
- (((od_val32)(od_val16)(a))*((od_val32)(od_val16)(b)))
-/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */
-# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16)
-/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
-# define OD_MULT16_16_Q15(a, b) \
- (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
-/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
-# define OD_MULT16_16_Q16(a, b) \
- ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> 16)
-#endif
-
/*All of these macros should expect floats as arguments.*/
-/*These two should compile as a single SSE instruction.*/
-# define OD_MINF(a, b) ((a) < (b) ? (a) : (b))
-# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b))
-
-# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y))
-
# define OD_SIGNMASK(a) (-((a) < 0))
# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
-# define OD_MULT16_16_Q15(a, b) \
- (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/onyxc_int.h b/third_party/aom/av1/common/onyxc_int.h
index 2396ce2f3..fa5f02e52 100644
--- a/third_party/aom/av1/common/onyxc_int.h
+++ b/third_party/aom/av1/common/onyxc_int.h
@@ -12,76 +12,72 @@
#ifndef AV1_COMMON_ONYXC_INT_H_
#define AV1_COMMON_ONYXC_INT_H_
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
#include "aom/internal/aom_codec_internal.h"
#include "aom_util/aom_thread.h"
-#if CONFIG_ANS
-#include "aom_dsp/ans.h"
-#endif
#include "av1/common/alloccommon.h"
#include "av1/common/av1_loopfilter.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/entropymv.h"
+#include "av1/common/enums.h"
#include "av1/common/frame_buffers.h"
#include "av1/common/mv.h"
#include "av1/common/quant_common.h"
-#if CONFIG_LOOP_RESTORATION
#include "av1/common/restoration.h"
-#endif // CONFIG_LOOP_RESTORATION
#include "av1/common/tile_common.h"
+#include "av1/common/timing.h"
#include "av1/common/odintrin.h"
-#if CONFIG_PVQ
-#include "av1/common/pvq.h"
-#endif
-#if CONFIG_CFL
-#include "av1/common/cfl.h"
-#endif
-#if CONFIG_HASH_ME
-// TODO(youzhou@microsoft.com): Encoder only. Move it out of common
#include "av1/encoder/hash_motion.h"
-#endif
+#include "aom_dsp/grain_synthesis.h"
+#include "aom_dsp/grain_table.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define CDEF_MAX_STRENGTHS 16
+#if defined(__clang__) && defined(__has_warning)
+#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
+#define AOM_FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT
+#endif
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define AOM_FALLTHROUGH_INTENDED __attribute__((fallthrough)) // NOLINT
+#endif
-#define REF_FRAMES_LOG2 3
-#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+#ifndef AOM_FALLTHROUGH_INTENDED
+#define AOM_FALLTHROUGH_INTENDED \
+ do { \
+ } while (0)
+#endif
-// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
-// in parallel, 3 for scaled references on the encoder.
-// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
-// of framebuffers.
-// TODO(jkoleszar): These 3 extra references could probably come from the
-// normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 7)
+#define CDEF_MAX_STRENGTHS 16
-#if CONFIG_REFERENCE_BUFFER
/* Constant values while waiting for the sequence header */
-#define FRAME_ID_NUMBERS_PRESENT_FLAG 1
-#define FRAME_ID_LENGTH_MINUS7 8 // Allows frame id up to 2^15-1
-#define DELTA_FRAME_ID_LENGTH_MINUS2 12 // Allows frame id deltas up to 2^14-1
-#endif // CONFIG_REFERENCE_BUFFER
+#define FRAME_ID_LENGTH 15
+#define DELTA_FRAME_ID_LENGTH 14
-#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
#define FRAME_CONTEXTS (FRAME_BUFFERS + 1)
// Extra frame context which is always kept at default values
#define FRAME_CONTEXT_DEFAULTS (FRAME_CONTEXTS - 1)
-#else
+#define PRIMARY_REF_BITS 3
+#define PRIMARY_REF_NONE 7
-#if CONFIG_EXT_REFS
-#define FRAME_CONTEXTS_LOG2 3
-#else
-#define FRAME_CONTEXTS_LOG2 2
-#endif
+#define NUM_PING_PONG_BUFFERS 2
-#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
-#endif // CONFIG_NO_FRAME_CONTEXT_SIGNALING
+#define MAX_NUM_TEMPORAL_LAYERS 8
+#define MAX_NUM_SPATIAL_LAYERS 4
+/* clang-format off */
+// clang-format seems to think this is a pointer dereference and not a
+// multiplication.
+#define MAX_NUM_OPERATING_POINTS \
+ MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS
+/* clang-format on*/
-#define NUM_PING_PONG_BUFFERS 2
+// TODO(jingning): Turning this on to set up transform coefficient
+// processing timer.
+#define TXCOEFF_TIMER 0
+#define TXCOEFF_COST_TIMER 0
typedef enum {
SINGLE_REFERENCE = 0,
@@ -90,20 +86,11 @@ typedef enum {
REFERENCE_MODES = 3,
} REFERENCE_MODE;
-#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
-typedef enum {
- RESET_FRAME_CONTEXT_NONE = 0,
- RESET_FRAME_CONTEXT_CURRENT = 1,
- RESET_FRAME_CONTEXT_ALL = 2,
-} RESET_FRAME_CONTEXT_MODE;
-#endif
-
typedef enum {
/**
- * Update frame context to values resulting from forward probability
- * updates signaled in the frame header
+ * Frame context updates are disabled
*/
- REFRESH_FRAME_CONTEXT_FORWARD,
+ REFRESH_FRAME_CONTEXT_DISABLED,
/**
* Update frame context to values resulting from backward probability
* updates based on entropy/counts in the decoded frame
@@ -111,57 +98,41 @@ typedef enum {
REFRESH_FRAME_CONTEXT_BACKWARD,
} REFRESH_FRAME_CONTEXT_MODE;
-#if CONFIG_MFMV
-#define MFMV_STACK_SIZE INTER_REFS_PER_FRAME
-
+#define MFMV_STACK_SIZE 3
typedef struct {
- int_mv mfmv[INTER_REFS_PER_FRAME][MFMV_STACK_SIZE];
+ int_mv mfmv0;
+ uint8_t ref_frame_offset;
} TPL_MV_REF;
-#endif
typedef struct {
- int_mv mv[2];
- int_mv pred_mv[2];
- MV_REFERENCE_FRAME ref_frame[2];
+ int_mv mv;
+ MV_REFERENCE_FRAME ref_frame;
} MV_REF;
typedef struct {
int ref_count;
-#if CONFIG_FRAME_MARKER
- int cur_frame_offset;
- int lst_frame_offset;
- int alt_frame_offset;
- int gld_frame_offset;
-#if CONFIG_EXT_REFS
- int lst2_frame_offset;
- int lst3_frame_offset;
- int bwd_frame_offset;
- int alt2_frame_offset;
-#endif
-#endif // CONFIG_FRAME_MARKER
+ unsigned int cur_frame_offset;
+ unsigned int ref_frame_offset[INTER_REFS_PER_FRAME];
-#if CONFIG_MFMV
- TPL_MV_REF *tpl_mvs;
-#endif
MV_REF *mvs;
+ uint8_t *seg_map;
+ struct segmentation seg;
int mi_rows;
int mi_cols;
// Width and height give the size of the buffer (before any upscaling, unlike
// the sizes that can be derived from the buf structure)
int width;
int height;
-#if CONFIG_GLOBAL_MOTION
- WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
-#endif // CONFIG_GLOBAL_MOTION
+ WarpedMotionParams global_motion[REF_FRAMES];
+ int showable_frame; // frame can be used as show existing frame in future
+ int film_grain_params_present;
+ aom_film_grain_t film_grain_params;
aom_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
-#if CONFIG_HASH_ME
hash_table hash_table;
-#endif
-#if CONFIG_TEMPMV_SIGNALING
uint8_t intra_only;
-#endif
+ FRAME_TYPE frame_type;
// The Following variables will only be used in frame parallel decode.
// frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
@@ -173,6 +144,12 @@ typedef struct {
// when the frame is fully decoded.
int row;
int col;
+
+ // Inter frame reference frame delta for loop filter
+ int8_t ref_deltas[REF_FRAMES];
+
+ // 0 = ZERO_MV, MV
+ int8_t mode_deltas[MAX_MODE_LF_DELTAS];
} RefCntBuffer;
typedef struct BufferPool {
@@ -195,28 +172,77 @@ typedef struct BufferPool {
InternalFrameBufferList int_frame_buffers;
} BufferPool;
-#if CONFIG_LV_MAP
typedef struct {
- int base_ctx_table[2 /*row*/][2 /*col*/][2 /*sig_map*/]
+ int base_ctx_table[2 /*row*/][2 /*col*/][3 /*sig_map*/]
[BASE_CONTEXT_POSITION_NUM + 1];
} LV_MAP_CTX_TABLE;
-typedef int BASE_CTX_TABLE[2 /*col*/][2 /*sig_map*/]
+typedef int BASE_CTX_TABLE[2 /*col*/][3 /*sig_map*/]
[BASE_CONTEXT_POSITION_NUM + 1];
-#endif
-#if CONFIG_REFERENCE_BUFFER
+typedef struct BitstreamLevel {
+ uint8_t major;
+ uint8_t minor;
+} BitstreamLevel;
+
/* Initial version of sequence header structure */
typedef struct SequenceHeader {
+ int num_bits_width;
+ int num_bits_height;
+ int max_frame_width;
+ int max_frame_height;
int frame_id_numbers_present_flag;
- int frame_id_length_minus7;
- int delta_frame_id_length_minus2;
+ int frame_id_length;
+ int delta_frame_id_length;
+ BLOCK_SIZE sb_size; // Size of the superblock used for this frame
+ int mib_size; // Size of the superblock in units of MI blocks
+ int mib_size_log2; // Log 2 of above.
+ int order_hint_bits_minus_1;
+ int force_screen_content_tools; // 0 - force off
+ // 1 - force on
+ // 2 - adaptive
+ int force_integer_mv; // 0 - Not to force. MV can be in 1/4 or 1/8
+ // 1 - force to integer
+ // 2 - adaptive
+ int still_picture; // Video is a single frame still picture
+ int reduced_still_picture_hdr; // Use reduced header for still picture
+ int monochrome; // Monochorme video
+ int enable_filter_intra; // enables/disables filterintra
+ int enable_intra_edge_filter; // enables/disables corner/edge/upsampling
+ int enable_interintra_compound; // enables/disables interintra_compound
+ int enable_masked_compound; // enables/disables masked compound
+ int enable_dual_filter; // 0 - disable dual interpolation filter
+ // 1 - enable vert/horiz filter selection
+ int enable_order_hint; // 0 - disable order hint, and related tools
+ // jnt_comp, ref_frame_mvs, frame_sign_bias
+ // if 0, enable_jnt_comp and
+ // enable_ref_frame_mvs must be set zs 0.
+ int enable_jnt_comp; // 0 - disable joint compound modes
+ // 1 - enable it
+ int enable_ref_frame_mvs; // 0 - disable ref frame mvs
+ // 1 - enable it
+ int enable_warped_motion; // 0 - disable warped motion for sequence
+ // 1 - enable it for the sequence
+ int enable_superres; // 0 - Disable superres for the sequence, and disable
+ // transmitting per-frame superres enabled flag.
+ // 1 - Enable superres for the sequence, and also
+ // enable per-frame flag to denote if superres is
+ // enabled for that frame.
+ int enable_cdef; // To turn on/off CDEF
+ int enable_restoration; // To turn on/off loop restoration
+ int operating_points_cnt_minus_1;
+ int operating_point_idc[MAX_NUM_OPERATING_POINTS];
+ int display_model_info_present_flag;
+ int decoder_model_info_present_flag;
+ BitstreamLevel level[MAX_NUM_OPERATING_POINTS];
+ uint8_t tier[MAX_NUM_OPERATING_POINTS]; // seq_tier in the spec. One bit: 0
+ // or 1.
} SequenceHeader;
-#endif // CONFIG_REFERENCE_BUFFER
typedef struct AV1Common {
struct aom_internal_error_info error;
- aom_color_space_t color_space;
- aom_transfer_function_t transfer_function;
+ aom_color_primaries_t color_primaries;
+ aom_transfer_characteristics_t transfer_characteristics;
+ aom_matrix_coefficients_t matrix_coefficients;
aom_chroma_sample_position_t chroma_sample_position;
int color_range;
int width;
@@ -225,6 +251,14 @@ typedef struct AV1Common {
int render_height;
int last_width;
int last_height;
+ int timing_info_present;
+ aom_timing_info_t timing_info;
+ int buffer_removal_delay_present;
+ aom_dec_model_info_t buffer_model;
+ aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1];
+ aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1];
+ int tu_presentation_delay_flag;
+ int64_t tu_presentation_delay;
// TODO(jkoleszar): this implies chroma ss right now, but could vary per
// plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to
@@ -232,10 +266,15 @@ typedef struct AV1Common {
int subsampling_x;
int subsampling_y;
-#if CONFIG_HIGHBITDEPTH
+ int largest_tile_id;
+ size_t largest_tile_size;
+ int context_update_tile_id;
+
+ // Scale of the current frame with respect to itself.
+ struct scale_factors sf_identity;
+
// Marks if we need to use 16bit frame buffers (1: yes, 0: no).
int use_highbitdepth;
-#endif
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer *prev_frame;
@@ -253,6 +292,10 @@ typedef struct AV1Common {
// Each Inter frame can reference INTER_REFS_PER_FRAME buffers
RefBuffer frame_refs[INTER_REFS_PER_FRAME];
+ int is_skip_mode_allowed;
+ int skip_mode_flag;
+ int ref_frame_idx_0;
+ int ref_frame_idx_1;
int new_fb_idx;
@@ -260,39 +303,26 @@ typedef struct AV1Common {
FRAME_TYPE frame_type;
int show_frame;
+ int showable_frame; // frame can be used as show existing frame in future
int last_show_frame;
int show_existing_frame;
-#if CONFIG_EXT_REFS
// Flag for a frame used as a reference - not written to the bitstream
int is_reference_frame;
-#endif // CONFIG_EXT_REFS
+ int reset_decoder_state;
// Flag signaling that the frame is encoded using only INTRA modes.
uint8_t intra_only;
uint8_t last_intra_only;
-
+ uint8_t disable_cdf_update;
int allow_high_precision_mv;
-#if CONFIG_AMVR
- int seq_mv_precision_level; // 0 the default in AOM, 1 only integer, 2
- // adaptive
- int cur_frame_mv_precision_level; // 0 the default in AOM, 1 only integer
-#endif
+ int cur_frame_force_integer_mv; // 0 the default in AOM, 1 only integer
int allow_screen_content_tools;
-#if CONFIG_INTERINTRA
- int allow_interintra_compound;
-#endif // CONFIG_INTERINTRA
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
- int allow_masked_compound;
-#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
-
-#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
- // Flag signaling which frame contexts should be reset to default values.
- RESET_FRAME_CONTEXT_MODE reset_frame_context;
-#endif
+ int allow_intrabc;
+ int allow_warped_motion;
// MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
- // MODE_INFO (8-pixel) units.
+ // MB_MODE_INFO (8-pixel) units.
int MBs;
int mb_rows, mi_rows;
int mb_cols, mi_cols;
@@ -301,119 +331,120 @@ typedef struct AV1Common {
/* profile settings */
TX_MODE tx_mode;
+#if CONFIG_ENTROPY_STATS
+ int coef_cdf_category;
+#endif
+
int base_qindex;
int y_dc_delta_q;
- int uv_dc_delta_q;
- int uv_ac_delta_q;
- int16_t y_dequant[MAX_SEGMENTS][2];
- int16_t uv_dequant[MAX_SEGMENTS][2];
+ int u_dc_delta_q;
+ int v_dc_delta_q;
+ int u_ac_delta_q;
+ int v_ac_delta_q;
+
+ int separate_uv_delta_q;
+
+ // The dequantizers below are true dequntizers used only in the
+ // dequantization process. They have the same coefficient
+ // shift/scale as TX.
+ int16_t y_dequant_QTX[MAX_SEGMENTS][2];
+ int16_t u_dequant_QTX[MAX_SEGMENTS][2];
+ int16_t v_dequant_QTX[MAX_SEGMENTS][2];
-#if CONFIG_AOM_QM
// Global quant matrix tables
- qm_val_t *giqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES_ALL];
- qm_val_t *gqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES_ALL];
+ const qm_val_t *giqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
+ const qm_val_t *gqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
// Local quant matrix tables for each frame
- qm_val_t *y_iqmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
- qm_val_t *uv_iqmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
- // Encoder
- qm_val_t *y_qmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
- qm_val_t *uv_qmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
+ const qm_val_t *y_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+ const qm_val_t *u_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+ const qm_val_t *v_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+ // Encoder
int using_qmatrix;
+ int qm_y;
+ int qm_u;
+ int qm_v;
int min_qmlevel;
int max_qmlevel;
-#endif
-#if CONFIG_NEW_QUANT
- dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
- dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
-#endif
- /* We allocate a MODE_INFO struct for each macroblock, together with
+ /* We allocate a MB_MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
int mi_alloc_size;
- MODE_INFO *mip; /* Base of allocated array */
- MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
+ MB_MODE_INFO *mip; /* Base of allocated array */
+ MB_MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
// TODO(agrange): Move prev_mi into encoder structure.
// prev_mip and prev_mi will only be allocated in encoder.
- MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
- MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+ MB_MODE_INFO *prev_mip; /* MB_MODE_INFO array 'mip' from last decoded frame */
+ MB_MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
// Separate mi functions between encoder and decoder.
int (*alloc_mi)(struct AV1Common *cm, int mi_size);
void (*free_mi)(struct AV1Common *cm);
void (*setup_mi)(struct AV1Common *cm);
- // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible
+ // Grid of pointers to 8x8 MB_MODE_INFO structs. Any 8x8 not in the visible
// area will be NULL.
- MODE_INFO **mi_grid_base;
- MODE_INFO **mi_grid_visible;
- MODE_INFO **prev_mi_grid_base;
- MODE_INFO **prev_mi_grid_visible;
-
- // Whether to use previous frame's motion vectors for prediction.
- int use_prev_frame_mvs;
+ MB_MODE_INFO **mi_grid_base;
+ MB_MODE_INFO **mi_grid_visible;
+ MB_MODE_INFO **prev_mi_grid_base;
+ MB_MODE_INFO **prev_mi_grid_visible;
- // Persistent mb segment id map used in prediction.
- int seg_map_idx;
- int prev_seg_map_idx;
+ // Whether to use previous frames' motion vectors for prediction.
+ int allow_ref_frame_mvs;
- uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
uint8_t *last_frame_seg_map;
uint8_t *current_frame_seg_map;
int seg_map_alloc_size;
InterpFilter interp_filter;
+ int switchable_motion_mode;
+
loop_filter_info_n lf_info;
-#if CONFIG_FRAME_SUPERRES
// The denominator of the superres scale; the numerator is fixed.
uint8_t superres_scale_denominator;
int superres_upscaled_width;
int superres_upscaled_height;
-#endif // CONFIG_FRAME_SUPERRES
-#if CONFIG_LOOP_RESTORATION
RestorationInfo rst_info[MAX_MB_PLANE];
- RestorationInternal rst_internal;
-#endif // CONFIG_LOOP_RESTORATION
+
+ // rst_end_stripe[i] is one more than the index of the bottom stripe
+ // for tile row i.
+ int rst_end_stripe[MAX_TILE_ROWS];
+
+ // Pointer to a scratch buffer used by self-guided restoration
+ int32_t *rst_tmpbuf;
+ RestorationLineBuffers *rlbs;
+
+ // Output of loop restoration
+ YV12_BUFFER_CONFIG rst_frame;
// Flag signaling how frame contexts should be updated at the end of
// a frame decode
REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
- int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */
+ int ref_frame_sign_bias[REF_FRAMES]; /* Two state 0, 1 */
struct loopfilter lf;
struct segmentation seg;
- int all_lossless;
- int frame_parallel_decode; // frame-based threading.
+ int coded_lossless; // frame is fully lossless at the coded resolution.
+ int all_lossless; // frame is fully lossless at the upscaled resolution.
-#if CONFIG_EXT_TX
int reduced_tx_set_used;
-#endif // CONFIG_EXT_TX
-// Context probabilities for reference frame prediction
-#if CONFIG_EXT_REFS
+ // Context probabilities for reference frame prediction
MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
-#else
- MV_REFERENCE_FRAME comp_fixed_ref;
- MV_REFERENCE_FRAME comp_var_ref[COMP_REFS];
-#endif // CONFIG_EXT_REFS
REFERENCE_MODE reference_mode;
FRAME_CONTEXT *fc; /* this frame entropy */
FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS
- FRAME_CONTEXT *pre_fc; // Context referenced in this frame
-#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
unsigned int frame_context_idx; /* Context to use/update */
-#endif
- FRAME_COUNTS counts;
+ int fb_of_context_type[REF_FRAMES];
+ int primary_ref_frame;
-#if CONFIG_FRAME_MARKER
unsigned int frame_offset;
-#endif
unsigned int current_video_frame;
BITSTREAM_PROFILE profile;
@@ -423,44 +454,27 @@ typedef struct AV1Common {
aom_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
int error_resilient_mode;
+ int force_primary_ref_none;
int tile_cols, tile_rows;
int last_tile_cols, last_tile_rows;
-#if CONFIG_MAX_TILE
+ int max_tile_width_sb;
int min_log2_tile_cols;
int max_log2_tile_cols;
int max_log2_tile_rows;
int min_log2_tile_rows;
int min_log2_tiles;
- int max_tile_width_sb;
int max_tile_height_sb;
int uniform_tile_spacing_flag;
int log2_tile_cols; // only valid for uniform tiles
int log2_tile_rows; // only valid for uniform tiles
int tile_col_start_sb[MAX_TILE_COLS + 1]; // valid for 0 <= i <= tile_cols
int tile_row_start_sb[MAX_TILE_ROWS + 1]; // valid for 0 <= i <= tile_rows
-#if CONFIG_DEPENDENT_HORZTILES
- int tile_row_independent[MAX_TILE_ROWS]; // valid for 0 <= i < tile_rows
-#endif
-#else
- int log2_tile_cols, log2_tile_rows; // Used in non-large_scale_tile_coding.
- int tile_width, tile_height; // In MI units
-#endif // CONFIG_MAX_TILE
+ int tile_width, tile_height; // In MI units
-#if CONFIG_EXT_TILE
unsigned int large_scale_tile;
unsigned int single_tile_decoding;
-#endif // CONFIG_EXT_TILE
-
-#if CONFIG_DEPENDENT_HORZTILES
- int dependent_horz_tiles;
- int tile_group_start_row[MAX_TILE_ROWS][MAX_TILE_COLS];
- int tile_group_start_col[MAX_TILE_ROWS][MAX_TILE_COLS];
-#endif
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
- int loop_filter_across_tiles_enabled;
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
int byte_alignment;
int skip_loop_filter;
@@ -476,74 +490,65 @@ typedef struct AV1Common {
// External BufferPool passed from outside.
BufferPool *buffer_pool;
- PARTITION_CONTEXT *above_seg_context;
- ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-#if CONFIG_VAR_TX
- TXFM_CONTEXT *above_txfm_context;
- TXFM_CONTEXT *top_txfm_context[MAX_MB_PLANE];
- TXFM_CONTEXT left_txfm_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
-#endif
- int above_context_alloc_cols;
-
- // scratch memory for intraonly/keyframe forward updates from default tables
- // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
- // each keyframe and not used afterwards
- aom_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
-#if CONFIG_GLOBAL_MOTION
- WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
-#endif
-
- BLOCK_SIZE sb_size; // Size of the superblock used for this frame
- int mib_size; // Size of the superblock in units of MI blocks
- int mib_size_log2; // Log 2 of above.
-#if CONFIG_CDEF
+ PARTITION_CONTEXT **above_seg_context;
+ ENTROPY_CONTEXT **above_context[MAX_MB_PLANE];
+ TXFM_CONTEXT **above_txfm_context;
+ WarpedMotionParams global_motion[REF_FRAMES];
+ aom_film_grain_table_t *film_grain_table;
+ int film_grain_params_present;
+ aom_film_grain_t film_grain_params;
int cdef_pri_damping;
int cdef_sec_damping;
int nb_cdef_strengths;
int cdef_strengths[CDEF_MAX_STRENGTHS];
int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
int cdef_bits;
-#endif
int delta_q_present_flag;
// Resolution of delta quant
int delta_q_res;
-#if CONFIG_EXT_DELTA_Q
int delta_lf_present_flag;
// Resolution of delta lf level
int delta_lf_res;
-#if CONFIG_LOOPFILTER_LEVEL
// This is a flag for number of deltas of loop filter level
// 0: use 1 delta, for y_vertical, y_horizontal, u, and v
// 1: use separate deltas for each filter level
int delta_lf_multi;
-#endif // CONFIG_LOOPFILTER_LEVEL
-#endif
int num_tg;
-#if CONFIG_REFERENCE_BUFFER
SequenceHeader seq_params;
int current_frame_id;
int ref_frame_id[REF_FRAMES];
int valid_for_referencing[REF_FRAMES];
- int refresh_mask;
- int invalid_delta_frame_id_minus1;
-#endif // CONFIG_REFERENCE_BUFFER
-#if CONFIG_ANS && ANS_MAX_SYMBOLS
- int ans_window_size_log2;
-#endif
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
- NCOBMC_KERNELS ncobmc_kernels[ADAPT_OVERLAP_BLOCKS][ALL_NCOBMC_MODES];
- uint8_t *ncobmcaw_buf[4];
-#endif
-#if CONFIG_LV_MAP
+ int invalid_delta_frame_id_minus_1;
LV_MAP_CTX_TABLE coeff_ctx_table;
+ TPL_MV_REF *tpl_mvs;
+ int tpl_mvs_mem_size;
+ // TODO(jingning): This can be combined with sign_bias later.
+ int8_t ref_frame_side[REF_FRAMES];
+
+ int is_annexb;
+
+ int frame_refs_short_signaling;
+ int temporal_layer_id;
+ int spatial_layer_id;
+ unsigned int number_temporal_layers;
+ unsigned int number_spatial_layers;
+ int num_allocated_above_context_mi_col;
+ int num_allocated_above_contexts;
+ int num_allocated_above_context_planes;
+
+#if TXCOEFF_TIMER
+ int64_t cum_txcoeff_timer;
+ int64_t txcoeff_timer;
+ int txb_count;
#endif
-#if CONFIG_LPF_SB
- int final_lpf_encode;
-#endif
-#if CONFIG_ADAPT_SCAN
- int use_adapt_scan;
+
+#if TXCOEFF_COST_TIMER
+ int64_t cum_txcoeff_cost_timer;
+ int64_t txcoeff_cost_timer;
+ int64_t txcoeff_cost_count;
#endif
+ const cfg_options_t *options;
} AV1_COMMON;
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
@@ -585,6 +590,17 @@ static INLINE int get_free_fb(AV1_COMMON *cm) {
if (frame_bufs[i].ref_count == 0) break;
if (i != FRAME_BUFFERS) {
+ if (frame_bufs[i].buf.use_external_refernce_buffers) {
+ // If this frame buffer's y_buffer, u_buffer, and v_buffer point to the
+ // external reference buffers. Restore the buffer pointers to point to the
+ // internally allocated memory.
+ YV12_BUFFER_CONFIG *ybf = &frame_bufs[i].buf;
+ ybf->y_buffer = ybf->store_buf_adr[0];
+ ybf->u_buffer = ybf->store_buf_adr[1];
+ ybf->v_buffer = ybf->store_buf_adr[2];
+ ybf->use_external_refernce_buffers = 0;
+ }
+
frame_bufs[i].ref_count = 1;
} else {
// Reset i to be INVALID_IDX to indicate no free buffer found.
@@ -606,270 +622,236 @@ static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
bufs[new_idx].ref_count++;
}
-#if CONFIG_TEMPMV_SIGNALING
-// Returns 1 if this frame might use mvs from some previous frame. This
-// function doesn't consider whether prev_frame is actually suitable (see
-// frame_can_use_prev_frame_mvs for that)
-static INLINE int frame_might_use_prev_frame_mvs(const AV1_COMMON *cm) {
- return !cm->error_resilient_mode && !cm->intra_only;
+static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE int frame_is_sframe(const AV1_COMMON *cm) {
+ return cm->frame_type == S_FRAME;
}
-// Returns 1 if this frame really can use MVs from some previous frame.
-static INLINE int frame_can_use_prev_frame_mvs(const AV1_COMMON *cm) {
- return (frame_might_use_prev_frame_mvs(cm) && cm->last_show_frame &&
- cm->prev_frame && !cm->prev_frame->intra_only &&
- cm->width == cm->prev_frame->width &&
- cm->height == cm->prev_frame->height);
+static INLINE RefCntBuffer *get_prev_frame(const AV1_COMMON *const cm) {
+ if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
+ cm->frame_refs[cm->primary_ref_frame].idx == INVALID_IDX) {
+ return NULL;
+ } else {
+ return &cm->buffer_pool
+ ->frame_bufs[cm->frame_refs[cm->primary_ref_frame].idx];
+ }
+}
+
+// Returns 1 if this frame might allow mvs from some reference frame.
+static INLINE int frame_might_allow_ref_frame_mvs(const AV1_COMMON *cm) {
+ return !cm->error_resilient_mode && cm->seq_params.enable_ref_frame_mvs &&
+ cm->seq_params.enable_order_hint && !frame_is_intra_only(cm);
+}
+
+// Returns 1 if this frame might use warped_motion
+static INLINE int frame_might_allow_warped_motion(const AV1_COMMON *cm) {
+ return !cm->error_resilient_mode && !frame_is_intra_only(cm) &&
+ cm->seq_params.enable_warped_motion;
}
-#endif
static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) {
- if (buf->mvs == NULL || buf->mi_rows < cm->mi_rows ||
- buf->mi_cols < cm->mi_cols) {
+ const int buf_rows = buf->mi_rows;
+ const int buf_cols = buf->mi_cols;
+
+ if (buf->mvs == NULL || buf_rows != cm->mi_rows || buf_cols != cm->mi_cols) {
aom_free(buf->mvs);
buf->mi_rows = cm->mi_rows;
buf->mi_cols = cm->mi_cols;
-#if CONFIG_TMV
CHECK_MEM_ERROR(cm, buf->mvs,
(MV_REF *)aom_calloc(
((cm->mi_rows + 1) >> 1) * ((cm->mi_cols + 1) >> 1),
sizeof(*buf->mvs)));
-#else
- CHECK_MEM_ERROR(
- cm, buf->mvs,
- (MV_REF *)aom_calloc(cm->mi_rows * cm->mi_cols, sizeof(*buf->mvs)));
-#endif // CONFIG_TMV
-
-#if CONFIG_MFMV
- aom_free(buf->tpl_mvs);
- CHECK_MEM_ERROR(
- cm, buf->tpl_mvs,
- (TPL_MV_REF *)aom_calloc((cm->mi_rows + MAX_MIB_SIZE) * cm->mi_stride,
- sizeof(*buf->tpl_mvs)));
-#endif
+ aom_free(buf->seg_map);
+ CHECK_MEM_ERROR(cm, buf->seg_map,
+ (uint8_t *)aom_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*buf->seg_map)));
}
-}
-#if CONFIG_VAR_REFS
-#define LAST_IS_VALID(cm) ((cm)->frame_refs[LAST_FRAME - 1].is_valid)
-#define LAST2_IS_VALID(cm) ((cm)->frame_refs[LAST2_FRAME - 1].is_valid)
-#define LAST3_IS_VALID(cm) ((cm)->frame_refs[LAST3_FRAME - 1].is_valid)
-#define GOLDEN_IS_VALID(cm) ((cm)->frame_refs[GOLDEN_FRAME - 1].is_valid)
-#define BWDREF_IS_VALID(cm) ((cm)->frame_refs[BWDREF_FRAME - 1].is_valid)
-#define ALTREF2_IS_VALID(cm) ((cm)->frame_refs[ALTREF2_FRAME - 1].is_valid)
-#define ALTREF_IS_VALID(cm) ((cm)->frame_refs[ALTREF_FRAME - 1].is_valid)
-
-#define L_OR_L2(cm) (LAST_IS_VALID(cm) || LAST2_IS_VALID(cm))
-#define L_AND_L2(cm) (LAST_IS_VALID(cm) && LAST2_IS_VALID(cm))
-#define L_AND_L3(cm) (LAST_IS_VALID(cm) && LAST3_IS_VALID(cm))
-#define L_AND_G(cm) (LAST_IS_VALID(cm) && GOLDEN_IS_VALID(cm))
-
-#define L3_OR_G(cm) (LAST3_IS_VALID(cm) || GOLDEN_IS_VALID(cm))
-#define L3_AND_G(cm) (LAST3_IS_VALID(cm) && GOLDEN_IS_VALID(cm))
-
-#define BWD_OR_ALT2(cm) (BWDREF_IS_VALID(cm) || ALTREF2_IS_VALID(cm))
-#define BWD_AND_ALT2(cm) (BWDREF_IS_VALID(cm) && ALTREF2_IS_VALID(cm))
-#define BWD_OR_ALT(cm) (BWDREF_IS_VALID(cm) || ALTREF_IS_VALID(cm))
-#define BWD_AND_ALT(cm) (BWDREF_IS_VALID(cm) && ALTREF_IS_VALID(cm))
-#endif // CONFIG_VAR_REFS
+ const int mem_size =
+ ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1);
+ int realloc = cm->tpl_mvs == NULL;
+ if (cm->tpl_mvs) realloc |= cm->tpl_mvs_mem_size < mem_size;
+
+ if (realloc) {
+ aom_free(cm->tpl_mvs);
+ CHECK_MEM_ERROR(cm, cm->tpl_mvs,
+ (TPL_MV_REF *)aom_calloc(mem_size, sizeof(*cm->tpl_mvs)));
+ cm->tpl_mvs_mem_size = mem_size;
+ }
+}
static INLINE int mi_cols_aligned_to_sb(const AV1_COMMON *cm) {
- return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2);
+ return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
}
static INLINE int mi_rows_aligned_to_sb(const AV1_COMMON *cm) {
- return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2);
+ return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
}
-static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) {
- return cm->frame_type == KEY_FRAME || cm->intra_only;
+void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
+
+static INLINE int av1_num_planes(const AV1_COMMON *cm) {
+ return cm->seq_params.monochrome ? 1 : MAX_MB_PLANE;
}
-#if CONFIG_CFL
-#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
-static INLINE void cfl_clear_sub8x8_val(CFL_CTX *cfl) {
- memset(cfl->sub8x8_val, 0, sizeof(cfl->sub8x8_val));
+static INLINE void av1_init_above_context(AV1_COMMON *cm, MACROBLOCKD *xd,
+ const int tile_row) {
+ const int num_planes = av1_num_planes(cm);
+ for (int i = 0; i < num_planes; ++i) {
+ xd->above_context[i] = cm->above_context[i][tile_row];
+ }
+ xd->above_seg_context = cm->above_seg_context[tile_row];
+ xd->above_txfm_context = cm->above_txfm_context[tile_row];
}
-#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
-void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
-#endif // CONFIG_CFL
static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
-#if CONFIG_PVQ
- tran_low_t *pvq_ref_coeff,
-#endif
-#if CONFIG_CFL
- CFL_CTX *cfl,
-#endif
tran_low_t *dqcoeff) {
- for (int i = 0; i < MAX_MB_PLANE; ++i) {
+ const int num_planes = av1_num_planes(cm);
+ for (int i = 0; i < num_planes; ++i) {
xd->plane[i].dqcoeff = dqcoeff;
-#if CONFIG_PVQ
- xd->plane[i].pvq_ref_coeff = pvq_ref_coeff;
-#endif
- xd->above_context[i] = cm->above_context[i];
+
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
- memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
-#if CONFIG_AOM_QM
+ memcpy(xd->plane[i].seg_dequant_QTX, cm->y_dequant_QTX,
+ sizeof(cm->y_dequant_QTX));
memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
-#endif
-#if CONFIG_NEW_QUANT
- memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
- sizeof(cm->y_dequant_nuq));
-#endif
} else {
- memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
-#if CONFIG_AOM_QM
- memcpy(xd->plane[i].seg_iqmatrix, cm->uv_iqmatrix,
- sizeof(cm->uv_iqmatrix));
-#endif
-#if CONFIG_NEW_QUANT
- memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
- sizeof(cm->uv_dequant_nuq));
-#endif
+ if (i == AOM_PLANE_U) {
+ memcpy(xd->plane[i].seg_dequant_QTX, cm->u_dequant_QTX,
+ sizeof(cm->u_dequant_QTX));
+ memcpy(xd->plane[i].seg_iqmatrix, cm->u_iqmatrix,
+ sizeof(cm->u_iqmatrix));
+ } else {
+ memcpy(xd->plane[i].seg_dequant_QTX, cm->v_dequant_QTX,
+ sizeof(cm->v_dequant_QTX));
+ memcpy(xd->plane[i].seg_iqmatrix, cm->v_iqmatrix,
+ sizeof(cm->v_iqmatrix));
+ }
}
}
- xd->fc = cm->fc;
- xd->above_seg_context = cm->above_seg_context;
-#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context;
-#endif
-#if CONFIG_CFL
- cfl_init(cfl, cm);
- xd->cfl = cfl;
-#endif
xd->mi_stride = cm->mi_stride;
xd->error_info = &cm->error;
+ cfl_init(&xd->cfl, cm);
}
-static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
+static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
+ const int num_planes) {
int i;
int row_offset = mi_row;
int col_offset = mi_col;
- for (i = 0; i < MAX_MB_PLANE; ++i) {
+ for (i = 0; i < num_planes; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];
-#if CONFIG_CHROMA_SUB8X8
// Offset the buffer pointer
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
row_offset = mi_row - 1;
if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
col_offset = mi_col - 1;
-#endif
- int above_idx = col_offset << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
- int left_idx = (row_offset & MAX_MIB_MASK)
- << (MI_SIZE_LOG2 - tx_size_high_log2[0]);
+ int above_idx = col_offset;
+ int left_idx = row_offset & MAX_MIB_MASK;
pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
}
}
static INLINE int calc_mi_size(int len) {
- // len is in mi units.
- return len + MAX_MIB_SIZE;
+ // len is in mi units. Align to a multiple of SBs.
+ return ALIGN_POWER_OF_TWO(len, MAX_MIB_SIZE_LOG2);
}
-static INLINE void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh) {
+static INLINE void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh,
+ const int num_planes) {
int i;
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x;
- xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y;
-
+ for (i = 0; i < num_planes; i++) {
xd->plane[i].width = (bw * MI_SIZE) >> xd->plane[i].subsampling_x;
xd->plane[i].height = (bh * MI_SIZE) >> xd->plane[i].subsampling_y;
-#if !CONFIG_CHROMA_2X2
xd->plane[i].width = AOMMAX(xd->plane[i].width, 4);
xd->plane[i].height = AOMMAX(xd->plane[i].height, 4);
-#endif
}
}
static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
int mi_row, int bh, int mi_col, int bw,
-#if CONFIG_DEPENDENT_HORZTILES
- int dependent_horz_tile_flag,
-#endif // CONFIG_DEPENDENT_HORZTILES
int mi_rows, int mi_cols) {
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
-#if CONFIG_DEPENDENT_HORZTILES
- if (dependent_horz_tile_flag) {
- xd->up_available = (mi_row > tile->mi_row_start) || !tile->tg_horz_boundary;
- } else {
-#endif // CONFIG_DEPENDENT_HORZTILES
- // Are edges available for intra prediction?
- xd->up_available = (mi_row > tile->mi_row_start);
-#if CONFIG_DEPENDENT_HORZTILES
- }
-#endif // CONFIG_DEPENDENT_HORZTILES
+ // Are edges available for intra prediction?
+ xd->up_available = (mi_row > tile->mi_row_start);
+
+ const int ss_x = xd->plane[1].subsampling_x;
+ const int ss_y = xd->plane[1].subsampling_y;
xd->left_available = (mi_col > tile->mi_col_start);
-#if CONFIG_CHROMA_SUB8X8
xd->chroma_up_available = xd->up_available;
xd->chroma_left_available = xd->left_available;
- if (xd->plane[1].subsampling_x && bw < mi_size_wide[BLOCK_8X8])
+ if (ss_x && bw < mi_size_wide[BLOCK_8X8])
xd->chroma_left_available = (mi_col - 1) > tile->mi_col_start;
- if (xd->plane[1].subsampling_y && bh < mi_size_high[BLOCK_8X8])
+ if (ss_y && bh < mi_size_high[BLOCK_8X8])
xd->chroma_up_available = (mi_row - 1) > tile->mi_row_start;
-#endif
if (xd->up_available) {
- xd->above_mi = xd->mi[-xd->mi_stride];
- // above_mi may be NULL in encoder's first pass.
- xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
+ xd->above_mbmi = xd->mi[-xd->mi_stride];
} else {
- xd->above_mi = NULL;
xd->above_mbmi = NULL;
}
if (xd->left_available) {
- xd->left_mi = xd->mi[-1];
- // left_mi may be NULL in encoder's first pass.
- xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
+ xd->left_mbmi = xd->mi[-1];
} else {
- xd->left_mi = NULL;
xd->left_mbmi = NULL;
}
+ const int chroma_ref = ((mi_row & 0x01) || !(bh & 0x01) || !ss_y) &&
+ ((mi_col & 0x01) || !(bw & 0x01) || !ss_x);
+ if (chroma_ref) {
+ // To help calculate the "above" and "left" chroma blocks, note that the
+ // current block may cover multiple luma blocks (eg, if partitioned into
+ // 4x4 luma blocks).
+ // First, find the top-left-most luma block covered by this chroma block
+ MB_MODE_INFO **base_mi =
+ &xd->mi[-(mi_row & ss_y) * xd->mi_stride - (mi_col & ss_x)];
+
+ // Then, we consider the luma region covered by the left or above 4x4 chroma
+ // prediction. We want to point to the chroma reference block in that
+ // region, which is the bottom-right-most mi unit.
+ // This leads to the following offsets:
+ MB_MODE_INFO *chroma_above_mi =
+ xd->chroma_up_available ? base_mi[-xd->mi_stride + ss_x] : NULL;
+ xd->chroma_above_mbmi = chroma_above_mi;
+
+ MB_MODE_INFO *chroma_left_mi =
+ xd->chroma_left_available ? base_mi[ss_y * xd->mi_stride - 1] : NULL;
+ xd->chroma_left_mbmi = chroma_left_mi;
+ }
+
xd->n8_h = bh;
xd->n8_w = bw;
xd->is_sec_rect = 0;
- if (xd->n8_w < xd->n8_h)
- if (mi_col & (xd->n8_h - 1)) xd->is_sec_rect = 1;
+ if (xd->n8_w < xd->n8_h) {
+ // Only mark is_sec_rect as 1 for the last block.
+ // For PARTITION_VERT_4, it would be (0, 0, 0, 1);
+ // For other partitions, it would be (0, 1).
+ if (!((mi_col + xd->n8_w) & (xd->n8_h - 1))) xd->is_sec_rect = 1;
+ }
if (xd->n8_w > xd->n8_h)
if (mi_row & (xd->n8_w - 1)) xd->is_sec_rect = 1;
}
-static INLINE const aom_prob *get_y_mode_probs(const AV1_COMMON *cm,
- const MODE_INFO *mi,
- const MODE_INFO *above_mi,
- const MODE_INFO *left_mi,
- int block) {
- const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, block);
- const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, block);
- return cm->kf_y_prob[above][left];
-}
-
static INLINE aom_cdf_prob *get_y_mode_cdf(FRAME_CONTEXT *tile_ctx,
- const MODE_INFO *mi,
- const MODE_INFO *above_mi,
- const MODE_INFO *left_mi,
- int block) {
- const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, block);
- const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, block);
-
-#if CONFIG_KF_CTX
- int above_ctx = intra_mode_context[above];
- int left_ctx = intra_mode_context[left];
+ const MB_MODE_INFO *above_mi,
+ const MB_MODE_INFO *left_mi) {
+ const PREDICTION_MODE above = av1_above_block_mode(above_mi);
+ const PREDICTION_MODE left = av1_left_block_mode(left_mi);
+ const int above_ctx = intra_mode_context[above];
+ const int left_ctx = intra_mode_context[left];
return tile_ctx->kf_y_cdf[above_ctx][left_ctx];
-#else
- return tile_ctx->kf_y_cdf[above][left];
-#endif
}
static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
@@ -879,130 +861,117 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
PARTITION_CONTEXT *const left_ctx =
xd->left_seg_context + (mi_row & MAX_MIB_MASK);
-#if CONFIG_EXT_PARTITION_TYPES
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
memset(above_ctx, partition_context_lookup[subsize].above, bw);
memset(left_ctx, partition_context_lookup[subsize].left, bh);
-#else
- // num_4x4_blocks_wide_lookup[bsize] / 2
- const int bs = mi_size_wide[bsize];
-
- // update the partition context at the end notes. set partition bits
- // of block sizes larger than the current one to be one, and partition
- // bits of smaller block sizes to be zero.
- memset(above_ctx, partition_context_lookup[subsize].above, bs);
- memset(left_ctx, partition_context_lookup[subsize].left, bs);
-#endif // CONFIG_EXT_PARTITION_TYPES
}
-#if CONFIG_CB4X4
static INLINE int is_chroma_reference(int mi_row, int mi_col, BLOCK_SIZE bsize,
int subsampling_x, int subsampling_y) {
-#if CONFIG_CHROMA_2X2
- return 1;
-#endif
-
-#if CONFIG_CHROMA_SUB8X8
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
-
int ref_pos = ((mi_row & 0x01) || !(bh & 0x01) || !subsampling_y) &&
((mi_col & 0x01) || !(bw & 0x01) || !subsampling_x);
-
- return ref_pos;
-#else
- int ref_pos = !(((mi_row & 0x01) && subsampling_y) ||
- ((mi_col & 0x01) && subsampling_x));
-
- if (bsize >= BLOCK_8X8) ref_pos = 1;
-
return ref_pos;
-#endif
-}
-
-#if CONFIG_SUPERTX
-static INLINE int need_handle_chroma_sub8x8(BLOCK_SIZE bsize, int subsampling_x,
- int subsampling_y) {
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
-
- if (bsize >= BLOCK_8X8 ||
- ((!(bh & 0x01) || !subsampling_y) && (!(bw & 0x01) || !subsampling_x)))
- return 0;
- else
- return 1;
}
-#endif
static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
int subsampling_y) {
BLOCK_SIZE bs = bsize;
-
- if (bs < BLOCK_8X8) {
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_8X8;
- else if (subsampling_x == 1)
- bs = BLOCK_8X4;
- else if (subsampling_y == 1)
- bs = BLOCK_4X8;
+ switch (bsize) {
+ case BLOCK_4X4:
+ if (subsampling_x == 1 && subsampling_y == 1)
+ bs = BLOCK_8X8;
+ else if (subsampling_x == 1)
+ bs = BLOCK_8X4;
+ else if (subsampling_y == 1)
+ bs = BLOCK_4X8;
+ break;
+ case BLOCK_4X8:
+ if (subsampling_x == 1 && subsampling_y == 1)
+ bs = BLOCK_8X8;
+ else if (subsampling_x == 1)
+ bs = BLOCK_8X8;
+ else if (subsampling_y == 1)
+ bs = BLOCK_4X8;
+ break;
+ case BLOCK_8X4:
+ if (subsampling_x == 1 && subsampling_y == 1)
+ bs = BLOCK_8X8;
+ else if (subsampling_x == 1)
+ bs = BLOCK_8X4;
+ else if (subsampling_y == 1)
+ bs = BLOCK_8X8;
+ break;
+ case BLOCK_4X16:
+ if (subsampling_x == 1 && subsampling_y == 1)
+ bs = BLOCK_8X16;
+ else if (subsampling_x == 1)
+ bs = BLOCK_8X16;
+ else if (subsampling_y == 1)
+ bs = BLOCK_4X16;
+ break;
+ case BLOCK_16X4:
+ if (subsampling_x == 1 && subsampling_y == 1)
+ bs = BLOCK_16X8;
+ else if (subsampling_x == 1)
+ bs = BLOCK_16X4;
+ else if (subsampling_y == 1)
+ bs = BLOCK_16X8;
+ break;
+ default: break;
}
-
return bs;
}
-#endif
static INLINE aom_cdf_prob cdf_element_prob(const aom_cdf_prob *cdf,
size_t element) {
assert(cdf != NULL);
-#if !CONFIG_ANS
return (element > 0 ? cdf[element - 1] : CDF_PROB_TOP) - cdf[element];
-#else
- return cdf[element] - (element > 0 ? cdf[element - 1] : 0);
-#endif
}
static INLINE void partition_gather_horz_alike(aom_cdf_prob *out,
- const aom_cdf_prob *const in) {
+ const aom_cdf_prob *const in,
+ BLOCK_SIZE bsize) {
+ (void)bsize;
out[0] = CDF_PROB_TOP;
out[0] -= cdf_element_prob(in, PARTITION_HORZ);
out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
-#if CONFIG_EXT_PARTITION_TYPES
out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
out[0] -= cdf_element_prob(in, PARTITION_HORZ_B);
out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
-#endif
+ if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_HORZ_4);
out[0] = AOM_ICDF(out[0]);
out[1] = AOM_ICDF(CDF_PROB_TOP);
}
static INLINE void partition_gather_vert_alike(aom_cdf_prob *out,
- const aom_cdf_prob *const in) {
+ const aom_cdf_prob *const in,
+ BLOCK_SIZE bsize) {
+ (void)bsize;
out[0] = CDF_PROB_TOP;
out[0] -= cdf_element_prob(in, PARTITION_VERT);
out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
-#if CONFIG_EXT_PARTITION_TYPES
out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
out[0] -= cdf_element_prob(in, PARTITION_VERT_B);
-#endif
+ if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_VERT_4);
out[0] = AOM_ICDF(out[0]);
out[1] = AOM_ICDF(CDF_PROB_TOP);
}
-#if CONFIG_EXT_PARTITION_TYPES
static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
int mi_col, BLOCK_SIZE subsize,
BLOCK_SIZE bsize,
PARTITION_TYPE partition) {
if (bsize >= BLOCK_8X8) {
-#if !CONFIG_EXT_PARTITION_TYPES_AB
const int hbs = mi_size_wide[bsize] / 2;
- BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
-#endif
+ BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
switch (partition) {
case PARTITION_SPLIT:
if (bsize != BLOCK_8X8) break;
+ AOM_FALLTHROUGH_INTENDED;
case PARTITION_NONE:
case PARTITION_HORZ:
case PARTITION_VERT:
@@ -1010,30 +979,6 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
case PARTITION_VERT_4:
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
break;
-#if CONFIG_EXT_PARTITION_TYPES_AB
- case PARTITION_HORZ_A:
- update_partition_context(xd, mi_row, mi_col,
- get_subsize(bsize, PARTITION_HORZ_4), subsize);
- update_partition_context(xd, mi_row + mi_size_high[bsize] / 2, mi_col,
- subsize, subsize);
- break;
- case PARTITION_HORZ_B:
- update_partition_context(xd, mi_row, mi_col, subsize, subsize);
- update_partition_context(xd, mi_row + mi_size_high[bsize] / 2, mi_col,
- get_subsize(bsize, PARTITION_HORZ_4), subsize);
- break;
- case PARTITION_VERT_A:
- update_partition_context(xd, mi_row, mi_col,
- get_subsize(bsize, PARTITION_VERT_4), subsize);
- update_partition_context(xd, mi_row, mi_col + mi_size_wide[bsize] / 2,
- subsize, subsize);
- break;
- case PARTITION_VERT_B:
- update_partition_context(xd, mi_row, mi_col, subsize, subsize);
- update_partition_context(xd, mi_row, mi_col + mi_size_wide[bsize] / 2,
- get_subsize(bsize, PARTITION_VERT_4), subsize);
- break;
-#else
case PARTITION_HORZ_A:
update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
@@ -1050,41 +995,35 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
update_partition_context(xd, mi_row, mi_col, subsize, subsize);
update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
break;
-#endif
default: assert(0 && "Invalid partition type");
}
}
}
-#endif // CONFIG_EXT_PARTITION_TYPES
static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
- int mi_col,
-#if CONFIG_UNPOISON_PARTITION_CTX
- int has_rows, int has_cols,
-#endif
- BLOCK_SIZE bsize) {
+ int mi_col, BLOCK_SIZE bsize) {
const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
const PARTITION_CONTEXT *left_ctx =
xd->left_seg_context + (mi_row & MAX_MIB_MASK);
// Minimum partition point is 8x8. Offset the bsl accordingly.
- const int bsl = mi_width_log2_lookup[bsize] - mi_width_log2_lookup[BLOCK_8X8];
+ const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
- assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+ assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
assert(bsl >= 0);
-#if CONFIG_UNPOISON_PARTITION_CTX
- if (has_rows && has_cols)
- return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
- else if (has_rows && !has_cols)
- return PARTITION_CONTEXTS_PRIMARY + bsl;
- else if (!has_rows && has_cols)
- return PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES + bsl;
- else
- return INVALID_PARTITION_CTX; // Bogus context, forced SPLIT
-#else
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
-#endif
+}
+
+// Return the number of elements in the partition CDF when
+// partitioning the (square) block with luma block size of bsize.
+static INLINE int partition_cdf_length(BLOCK_SIZE bsize) {
+ if (bsize <= BLOCK_8X8)
+ return PARTITION_TYPES;
+ else if (bsize == BLOCK_128X128)
+ return EXT_PARTITION_TYPES - 2;
+ else
+ return EXT_PARTITION_TYPES;
}
static INLINE int max_block_wide(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
@@ -1107,11 +1046,10 @@ static INLINE int max_block_high(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
if (xd->mb_to_bottom_edge < 0)
max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
- // Scale the width in the transform block unit.
- return max_blocks_high >> tx_size_wide_log2[0];
+ // Scale the height in the transform block unit.
+ return max_blocks_high >> tx_size_high_log2[0];
}
-#if CONFIG_CFL
static INLINE int max_intra_block_width(const MACROBLOCKD *xd,
BLOCK_SIZE plane_bsize, int plane,
TX_SIZE tx_size) {
@@ -1127,36 +1065,43 @@ static INLINE int max_intra_block_height(const MACROBLOCKD *xd,
<< tx_size_high_log2[0];
return ALIGN_POWER_OF_TWO(max_blocks_high, tx_size_high_log2[tx_size]);
}
-#endif // CONFIG_CFL
static INLINE void av1_zero_above_context(AV1_COMMON *const cm,
- int mi_col_start, int mi_col_end) {
+ int mi_col_start, int mi_col_end, const int tile_row) {
+ const int num_planes = av1_num_planes(cm);
const int width = mi_col_end - mi_col_start;
- const int aligned_width = ALIGN_POWER_OF_TWO(width, cm->mib_size_log2);
+ const int aligned_width =
+ ALIGN_POWER_OF_TWO(width, cm->seq_params.mib_size_log2);
- const int offset_y = mi_col_start << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
- const int width_y = aligned_width << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
+ const int offset_y = mi_col_start;
+ const int width_y = aligned_width;
const int offset_uv = offset_y >> cm->subsampling_x;
const int width_uv = width_y >> cm->subsampling_x;
- av1_zero_array(cm->above_context[0] + offset_y, width_y);
- av1_zero_array(cm->above_context[1] + offset_uv, width_uv);
- av1_zero_array(cm->above_context[2] + offset_uv, width_uv);
+ av1_zero_array(cm->above_context[0][tile_row] + offset_y, width_y);
+ if (num_planes > 1) {
+ if (cm->above_context[1][tile_row] && cm->above_context[2][tile_row]) {
+ av1_zero_array(cm->above_context[1][tile_row] + offset_uv, width_uv);
+ av1_zero_array(cm->above_context[2][tile_row] + offset_uv, width_uv);
+ } else {
+ aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ "Invalid value of planes");
+ }
+ }
- av1_zero_array(cm->above_seg_context + mi_col_start, aligned_width);
+ av1_zero_array(cm->above_seg_context[tile_row] + mi_col_start, aligned_width);
-#if CONFIG_VAR_TX
- av1_zero_array(cm->above_txfm_context + (mi_col_start << TX_UNIT_WIDE_LOG2),
- aligned_width << TX_UNIT_WIDE_LOG2);
-#endif // CONFIG_VAR_TX
+ memset(cm->above_txfm_context[tile_row] + mi_col_start,
+ tx_size_wide[TX_SIZES_LARGEST],
+ aligned_width * sizeof(TXFM_CONTEXT));
}
static INLINE void av1_zero_left_context(MACROBLOCKD *const xd) {
av1_zero(xd->left_context);
av1_zero(xd->left_seg_context);
-#if CONFIG_VAR_TX
- av1_zero(xd->left_txfm_context_buffer);
-#endif
+
+ memset(xd->left_txfm_context_buffer, tx_size_high[TX_SIZES_LARGEST],
+ sizeof(xd->left_txfm_context_buffer));
}
// Disable array-bounds checks as the TX_SIZE enum contains values larger than
@@ -1166,15 +1111,11 @@ static INLINE void av1_zero_left_context(MACROBLOCKD *const xd) {
#if defined(__GNUC__) && __GNUC__ >= 4
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
-static INLINE TX_SIZE get_min_tx_size(TX_SIZE tx_size) {
- assert(tx_size < TX_SIZES_ALL);
- return txsize_sqr_map[tx_size];
-}
+
#if defined(__GNUC__) && __GNUC__ >= 4
#pragma GCC diagnostic warning "-Warray-bounds"
#endif
-#if CONFIG_VAR_TX
static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, uint8_t txs, int len) {
int i;
for (i = 0; i < len; ++i) txfm_ctx[i] = txs;
@@ -1190,16 +1131,16 @@ static INLINE void set_txfm_ctxs(TX_SIZE tx_size, int n8_w, int n8_h, int skip,
bh = n8_h * MI_SIZE;
}
- set_txfm_ctx(xd->above_txfm_context, bw, n8_w << TX_UNIT_WIDE_LOG2);
- set_txfm_ctx(xd->left_txfm_context, bh, n8_h << TX_UNIT_HIGH_LOG2);
+ set_txfm_ctx(xd->above_txfm_context, bw, n8_w);
+ set_txfm_ctx(xd->left_txfm_context, bh, n8_h);
}
static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
TXFM_CONTEXT *left_ctx,
TX_SIZE tx_size, TX_SIZE txb_size) {
BLOCK_SIZE bsize = txsize_to_bsize[txb_size];
- int bh = mi_size_high[bsize] << TX_UNIT_HIGH_LOG2;
- int bw = mi_size_wide[bsize] << TX_UNIT_WIDE_LOG2;
+ int bh = mi_size_high[bsize];
+ int bw = mi_size_wide[bsize];
uint8_t txw = tx_size_wide[tx_size];
uint8_t txh = tx_size_high[tx_size];
int i;
@@ -1209,16 +1150,8 @@ static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
static INLINE TX_SIZE get_sqr_tx_size(int tx_dim) {
switch (tx_dim) {
-#if CONFIG_EXT_PARTITION
case 128:
-#endif // CONFIG_EXT_PARTITION
- case 64:
-#if CONFIG_TX64X64
- return TX_64X64;
-#else
- return TX_32X32;
-#endif // CONFIG_TX64X64
- break;
+ case 64: return TX_64X64; break;
case 32: return TX_32X32; break;
case 16: return TX_16X16; break;
case 8: return TX_8X8; break;
@@ -1226,6 +1159,45 @@ static INLINE TX_SIZE get_sqr_tx_size(int tx_dim) {
}
}
+static INLINE TX_SIZE get_tx_size(int width, int height) {
+ if (width == height) {
+ return get_sqr_tx_size(width);
+ }
+ if (width < height) {
+ if (width + width == height) {
+ switch (width) {
+ case 4: return TX_4X8; break;
+ case 8: return TX_8X16; break;
+ case 16: return TX_16X32; break;
+ case 32: return TX_32X64; break;
+ }
+ } else {
+ switch (width) {
+ case 4: return TX_4X16; break;
+ case 8: return TX_8X32; break;
+ case 16: return TX_16X64; break;
+ }
+ }
+ } else {
+ if (height + height == width) {
+ switch (height) {
+ case 4: return TX_8X4; break;
+ case 8: return TX_16X8; break;
+ case 16: return TX_32X16; break;
+ case 32: return TX_64X32; break;
+ }
+ } else {
+ switch (height) {
+ case 4: return TX_16X4; break;
+ case 8: return TX_32X8; break;
+ case 16: return TX_64X16; break;
+ }
+ }
+ }
+ assert(0);
+ return TX_4X4;
+}
+
static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
TXFM_CONTEXT *left_ctx,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
@@ -1233,7 +1205,7 @@ static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
const uint8_t txh = tx_size_high[tx_size];
const int above = *above_ctx < txw;
const int left = *left_ctx < txh;
- int category = TXFM_PARTITION_CONTEXTS - 1;
+ int category = TXFM_PARTITION_CONTEXTS;
// dummy return, not used by others.
if (tx_size <= TX_4X4) return 0;
@@ -1242,13 +1214,13 @@ static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
if (max_tx_size >= TX_8X8) {
- category = (tx_size != max_tx_size && max_tx_size > TX_8X8) +
- (TX_SIZES - 1 - max_tx_size) * 2;
+ category =
+ (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
+ (TX_SIZES - 1 - max_tx_size) * 2;
}
- if (category == TXFM_PARTITION_CONTEXTS - 1) return category;
+ assert(category != TXFM_PARTITION_CONTEXTS);
return category * 3 + above + left;
}
-#endif
// Compute the next partition in the direction of the sb_type stored in the mi
// array, starting with bsize.
@@ -1258,8 +1230,8 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return PARTITION_INVALID;
const int offset = mi_row * cm->mi_stride + mi_col;
- MODE_INFO **mi = cm->mi_grid_visible + offset;
- const BLOCK_SIZE subsize = mi[0]->mbmi.sb_type;
+ MB_MODE_INFO **mi = cm->mi_grid_visible + offset;
+ const BLOCK_SIZE subsize = mi[0]->sb_type;
if (subsize == bsize) return PARTITION_NONE;
@@ -1268,25 +1240,14 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
const int sshigh = mi_size_high[subsize];
const int sswide = mi_size_wide[subsize];
-#if CONFIG_EXT_PARTITION_TYPES
if (bsize > BLOCK_8X8 && mi_row + bwide / 2 < cm->mi_rows &&
mi_col + bhigh / 2 < cm->mi_cols) {
// In this case, the block might be using an extended partition
// type.
- const MB_MODE_INFO *const mbmi_right = &mi[bwide / 2]->mbmi;
- const MB_MODE_INFO *const mbmi_below = &mi[bhigh / 2 * cm->mi_stride]->mbmi;
+ const MB_MODE_INFO *const mbmi_right = mi[bwide / 2];
+ const MB_MODE_INFO *const mbmi_below = mi[bhigh / 2 * cm->mi_stride];
if (sswide == bwide) {
-#if CONFIG_EXT_PARTITION_TYPES_AB
- // Smaller height but same width. Is PARTITION_HORZ, PARTITION_HORZ_4,
- // PARTITION_HORZ_A or PARTITION_HORZ_B.
- if (sshigh * 2 == bhigh)
- return (mbmi_below->sb_type == subsize) ? PARTITION_HORZ
- : PARTITION_HORZ_B;
- assert(sshigh * 4 == bhigh);
- return (mbmi_below->sb_type == subsize) ? PARTITION_HORZ_4
- : PARTITION_HORZ_A;
-#else
// Smaller height but same width. Is PARTITION_HORZ_4, PARTITION_HORZ or
// PARTITION_HORZ_B. To distinguish the latter two, check if the lower
// half was split.
@@ -1297,18 +1258,7 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
return PARTITION_HORZ;
else
return PARTITION_HORZ_B;
-#endif
} else if (sshigh == bhigh) {
-#if CONFIG_EXT_PARTITION_TYPES_AB
- // Smaller width but same height. Is PARTITION_VERT, PARTITION_VERT_4,
- // PARTITION_VERT_A or PARTITION_VERT_B.
- if (sswide * 2 == bwide)
- return (mbmi_right->sb_type == subsize) ? PARTITION_VERT
- : PARTITION_VERT_B;
- assert(sswide * 4 == bwide);
- return (mbmi_right->sb_type == subsize) ? PARTITION_VERT_4
- : PARTITION_VERT_A;
-#else
// Smaller width but same height. Is PARTITION_VERT_4, PARTITION_VERT or
// PARTITION_VERT_B. To distinguish the latter two, check if the right
// half was split.
@@ -1319,9 +1269,7 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
return PARTITION_VERT;
else
return PARTITION_VERT_B;
-#endif
} else {
-#if !CONFIG_EXT_PARTITION_TYPES_AB
// Smaller width and smaller height. Might be PARTITION_SPLIT or could be
// PARTITION_HORZ_A or PARTITION_VERT_A. If subsize isn't halved in both
// dimensions, we immediately know this is a split (which will recurse to
@@ -1333,12 +1281,10 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
if (mi_size_wide[mbmi_below->sb_type] == bwide) return PARTITION_HORZ_A;
if (mi_size_high[mbmi_right->sb_type] == bhigh) return PARTITION_VERT_A;
-#endif
return PARTITION_SPLIT;
}
}
-#endif
const int vert_split = sswide < bwide;
const int horz_split = sshigh < bhigh;
const int split_idx = (vert_split << 1) | horz_split;
@@ -1352,49 +1298,46 @@ static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
}
static INLINE void set_use_reference_buffer(AV1_COMMON *const cm, int use) {
-#if CONFIG_REFERENCE_BUFFER
cm->seq_params.frame_id_numbers_present_flag = use;
-#else
- (void)cm;
- (void)use;
-#endif
}
-static INLINE void set_sb_size(AV1_COMMON *const cm, BLOCK_SIZE sb_size) {
- cm->sb_size = sb_size;
- cm->mib_size = mi_size_wide[cm->sb_size];
-#if CONFIG_CB4X4
- cm->mib_size_log2 = b_width_log2_lookup[cm->sb_size];
-#else
- cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size];
-#endif
+static INLINE void set_sb_size(SequenceHeader *const seq_params,
+ BLOCK_SIZE sb_size) {
+ seq_params->sb_size = sb_size;
+ seq_params->mib_size = mi_size_wide[seq_params->sb_size];
+ seq_params->mib_size_log2 = mi_size_wide_log2[seq_params->sb_size];
}
-static INLINE int all_lossless(const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- int i;
- int all_lossless = 1;
+// Returns true if the frame is fully lossless at the coded resolution.
+// Note: If super-resolution is used, such a frame will still NOT be lossless at
+// the upscaled resolution.
+static INLINE int is_coded_lossless(const AV1_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int coded_lossless = 1;
if (cm->seg.enabled) {
- for (i = 0; i < MAX_SEGMENTS; ++i) {
+ for (int i = 0; i < MAX_SEGMENTS; ++i) {
if (!xd->lossless[i]) {
- all_lossless = 0;
+ coded_lossless = 0;
break;
}
}
} else {
- all_lossless = xd->lossless[0];
+ coded_lossless = xd->lossless[0];
}
- return all_lossless;
+ return coded_lossless;
}
-static INLINE int use_compressed_header(const AV1_COMMON *cm) {
- (void)cm;
-#if CONFIG_RESTRICT_COMPRESSED_HDR && CONFIG_NEW_MULTISYMBOL
- return 0;
-#elif CONFIG_RESTRICT_COMPRESSED_HDR
- return cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD;
-#else
- return 1;
-#endif // CONFIG_RESTRICT_COMPRESSED_HDR && CONFIG_NEW_MULTISYMBOL
+static INLINE int is_valid_seq_level_idx(uint8_t seq_level_idx) {
+ return seq_level_idx < 24 || seq_level_idx == 31;
+}
+
+static INLINE uint8_t major_minor_to_seq_level_idx(BitstreamLevel bl) {
+ assert(bl.major >= LEVEL_MAJOR_MIN && bl.major <= LEVEL_MAJOR_MAX);
+ // Since bl.minor is unsigned a comparison will return a warning:
+ // comparison is always true due to limited range of data type
+ assert(LEVEL_MINOR_MIN == 0);
+ assert(bl.minor <= LEVEL_MINOR_MAX);
+ return ((bl.major - LEVEL_MAJOR_MIN) << LEVEL_MINOR_BITS) + bl.minor;
}
#ifdef __cplusplus
diff --git a/third_party/aom/av1/common/partition.c b/third_party/aom/av1/common/partition.c
deleted file mode 100644
index 634a9edd5..000000000
--- a/third_party/aom/av1/common/partition.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "enums.h"
-#include "odintrin.h"
-#include "partition.h"
-#include "zigzag.h"
-
-OD_EXTERN const index_pair *OD_ZIGZAG4[4] = {
- OD_ZIGZAG4_DCT_DCT,
- OD_ZIGZAG4_ADST_DCT,
- OD_ZIGZAG4_DCT_ADST,
- OD_ZIGZAG4_ADST_ADST
-};
-
-OD_EXTERN const index_pair *OD_ZIGZAG8[4] = {
- OD_ZIGZAG8_DCT_DCT,
- OD_ZIGZAG8_ADST_DCT,
- OD_ZIGZAG8_DCT_ADST,
- OD_ZIGZAG8_ADST_ADST
-};
-
-OD_EXTERN const index_pair *OD_ZIGZAG16[4] = {
- OD_ZIGZAG16_DCT_DCT,
- OD_ZIGZAG16_ADST_DCT,
- OD_ZIGZAG16_DCT_ADST,
- OD_ZIGZAG16_ADST_ADST
-};
-
-OD_EXTERN const index_pair *OD_ZIGZAG32[4] = {
- OD_ZIGZAG32_DCT_DCT,
- OD_ZIGZAG32_DCT_DCT,
- OD_ZIGZAG32_DCT_DCT,
- OD_ZIGZAG32_DCT_DCT
-};
-
-/* The tables below specify how coefficient blocks are translated to
- and from PVQ partition coding scan order for 4x4, 8x8 and 16x16 */
-
-static const int OD_LAYOUT32_OFFSETS[4] = { 0, 128, 256, 768 };
-const band_layout OD_LAYOUT32 = {
- OD_ZIGZAG32,
- 32,
- 3,
- OD_LAYOUT32_OFFSETS
-};
-
-static const int OD_LAYOUT16_OFFSETS[4] = { 0, 32, 64, 192 };
-const band_layout OD_LAYOUT16 = {
- OD_ZIGZAG16,
- 16,
- 3,
- OD_LAYOUT16_OFFSETS
-};
-
-const int OD_LAYOUT8_OFFSETS[4] = { 0, 8, 16, 48 };
-const band_layout OD_LAYOUT8 = {
- OD_ZIGZAG8,
- 8,
- 3,
- OD_LAYOUT8_OFFSETS
-};
-
-static const int OD_LAYOUT4_OFFSETS[2] = { 0, 15 };
-const band_layout OD_LAYOUT4 = {
- OD_ZIGZAG4,
- 4,
- 1,
- OD_LAYOUT4_OFFSETS
-};
-
-/* First element is the number of bands, followed by the list all the band
- boundaries. */
-static const int OD_BAND_OFFSETS4[] = {1, 1, 16};
-static const int OD_BAND_OFFSETS8[] = {4, 1, 16, 24, 32, 64};
-static const int OD_BAND_OFFSETS16[] = {7, 1, 16, 24, 32, 64, 96, 128, 256};
-static const int OD_BAND_OFFSETS32[] = {10, 1, 16, 24, 32, 64, 96, 128, 256,
- 384, 512, 1024};
-static const int OD_BAND_OFFSETS64[] = {13, 1, 16, 24, 32, 64, 96, 128, 256,
- 384, 512, 1024, 1536, 2048, 4096};
-
-const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1] = {
- OD_BAND_OFFSETS4,
- OD_BAND_OFFSETS8,
- OD_BAND_OFFSETS16,
- OD_BAND_OFFSETS32,
- OD_BAND_OFFSETS64
-};
-
-/** Perform a single stage of conversion from a coefficient block in
- * raster order into coding scan order
- *
- * @param [in] layout scan order specification
- * @param [out] dst destination vector
- * @param [in] src source coefficient block
- * @param [int] int source vector row stride
- */
-static void od_band_from_raster(const band_layout *layout, tran_low_t *dst,
- const tran_low_t *src, int stride, TX_TYPE tx_type) {
- int i;
- int len;
- len = layout->band_offsets[layout->nb_bands];
- for (i = 0; i < len; i++) {
- dst[i] = src[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]];
- }
-}
-
-/** Perform a single stage of conversion from a vector in coding scan
- order back into a coefficient block in raster order
- *
- * @param [in] layout scan order specification
- * @param [out] dst destination coefficient block
- * @param [in] src source vector
- * @param [int] stride destination vector row stride
- */
-static void od_raster_from_band(const band_layout *layout, tran_low_t *dst,
- int stride, TX_TYPE tx_type, const tran_low_t *src) {
- int i;
- int len;
- len = layout->band_offsets[layout->nb_bands];
- for (i = 0; i < len; i++) {
- dst[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]] = src[i];
- }
-}
-
-static const band_layout *const OD_LAYOUTS[] = {&OD_LAYOUT4, &OD_LAYOUT8,
- &OD_LAYOUT16, &OD_LAYOUT32};
-
-/** Converts a coefficient block in raster order into a vector in
- * coding scan order with the PVQ partitions laid out one after
- * another. This works in stages; the 4x4 conversion is applied to
- * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
- * nearest DC that was not already coded by 4x4, then 16x16 following
- * the same pattern.
- *
- * @param [out] dst destination vector
- * @param [in] n block size (along one side)
- * @param [in] ty_type transfrom type
- * @param [in] src source coefficient block
- * @param [in] stride source vector row stride
- */
-void od_raster_to_coding_order(tran_low_t *dst, int n, TX_TYPE ty_type,
- const tran_low_t *src, int stride) {
- int bs;
- /* dst + 1 because DC is not included for 4x4 blocks. */
- od_band_from_raster(OD_LAYOUTS[0], dst + 1, src, stride, ty_type);
- for (bs = 1; bs < OD_TXSIZES; bs++) {
- int size;
- int offset;
- /* Length of block size > 4. */
- size = 1 << (OD_LOG_BSIZE0 + bs);
- /* Offset is the size of the previous block squared. */
- offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
- if (n >= size) {
- /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
- od_band_from_raster(OD_LAYOUTS[bs], dst + offset, src, stride, ty_type);
- }
- }
- dst[0] = src[0];
-}
-
-/** Converts a vector in coding scan order witht he PVQ partitions
- * laid out one after another into a coefficient block in raster
- * order. This works in stages in the reverse order of raster->scan
- * order; the 16x16 conversion is applied to the coefficients that
- * don't appear in an 8x8 block, then the 8x8 applied to the 8x8 block
- * sans the 4x4 block it contains, then 4x4 is converted sans DC.
- *
- * @param [out] dst destination coefficient block
- * @param [in] stride destination vector row stride
- * @param [in] src source vector
- * @param [in] n block size (along one side)
- */
-void od_coding_order_to_raster(tran_low_t *dst, int stride, TX_TYPE ty_type,
- const tran_low_t *src, int n) {
- int bs;
- /* src + 1 because DC is not included for 4x4 blocks. */
- od_raster_from_band(OD_LAYOUTS[0], dst, stride, ty_type, src + 1);
- for (bs = 1; bs < OD_TXSIZES; bs++) {
- int size;
- int offset;
- /* Length of block size > 4 */
- size = 1 << (OD_LOG_BSIZE0 + bs);
- /* Offset is the size of the previous block squared. */
- offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
- if (n >= size) {
- /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
- od_raster_from_band(OD_LAYOUTS[bs], dst, stride, ty_type, src + offset);
- }
- }
- dst[0] = src[0];
-}
-
-/** Perform a single stage of conversion from a coefficient block in
- * raster order into coding scan order
- *
- * @param [in] layout scan order specification
- * @param [out] dst destination vector
- * @param [in] src source coefficient block
- * @param [int] int source vector row stride
- */
-static void od_band_from_raster_16(const band_layout *layout, int16_t *dst,
- const int16_t *src, int stride) {
- int i;
- int len;
- len = layout->band_offsets[layout->nb_bands];
- for (i = 0; i < len; i++) {
- dst[i] = src[layout->dst_table[DCT_DCT][i][1]*stride + layout->dst_table[DCT_DCT][i][0]];
- }
-}
-
-/** Converts a coefficient block in raster order into a vector in
- * coding scan order with the PVQ partitions laid out one after
- * another. This works in stages; the 4x4 conversion is applied to
- * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
- * nearest DC that was not already coded by 4x4, then 16x16 following
- * the same pattern.
- *
- * @param [out] dst destination vector
- * @param [in] n block size (along one side)
- * @param [in] src source coefficient block
- * @param [in] stride source vector row stride
- */
-void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
- int stride) {
- int bs;
- /* dst + 1 because DC is not included for 4x4 blocks. */
- od_band_from_raster_16(OD_LAYOUTS[0], dst + 1, src, stride);
- for (bs = 1; bs < OD_TXSIZES; bs++) {
- int size;
- int offset;
- /* Length of block size > 4. */
- size = 1 << (OD_LOG_BSIZE0 + bs);
- /* Offset is the size of the previous block squared. */
- offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
- if (n >= size) {
- /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
- od_band_from_raster_16(OD_LAYOUTS[bs], dst + offset, src, stride);
- }
- }
- dst[0] = src[0];
-}
diff --git a/third_party/aom/av1/common/partition.h b/third_party/aom/av1/common/partition.h
deleted file mode 100644
index bd308f94f..000000000
--- a/third_party/aom/av1/common/partition.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#if !defined(_partition_H)
-# define _partition_H
-
-#include "av1/common/enums.h"
-#include "odintrin.h"
-
-typedef unsigned char index_pair[2];
-
-typedef struct {
- const index_pair **const dst_table;
- int size;
- int nb_bands;
- const int *const band_offsets;
-} band_layout;
-
-extern const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1];
-
-void od_raster_to_coding_order(tran_low_t *dst, int n, TX_TYPE ty_type,
- const tran_low_t *src, int stride);
-
-void od_coding_order_to_raster(tran_low_t *dst, int stride, TX_TYPE ty_type,
- const tran_low_t *src, int n);
-
-void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
- int stride);
-
-#endif
diff --git a/third_party/aom/av1/common/ppc/cfl_ppc.c b/third_party/aom/av1/common/ppc/cfl_ppc.c
new file mode 100644
index 000000000..58933a7b3
--- /dev/null
+++ b/third_party/aom/av1/common/ppc/cfl_ppc.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <altivec.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/cfl.h"
+
+#define OFF_0 0
+#define OFF_1 16
+#define OFF_2 32
+#define OFF_3 48
+#define CFL_BUF_LINE_BYTES 64
+#define CFL_LINE_1 64
+#define CFL_LINE_2 128
+#define CFL_LINE_3 192
+
+typedef vector int8_t int8x16_t;
+typedef vector uint8_t uint8x16_t;
+typedef vector int16_t int16x8_t;
+typedef vector uint16_t uint16x8_t;
+typedef vector int32_t int32x4_t;
+typedef vector uint32_t uint32x4_t;
+typedef vector uint64_t uint64x2_t;
+
+static INLINE void subtract_average_vsx(int16_t *pred_buf, int width,
+ int height, int round_offset,
+ int num_pel_log2) {
+ const int16_t *end = pred_buf + height * CFL_BUF_LINE;
+ const int16_t *sum_buf = pred_buf;
+ const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
+ const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
+ const uint8x16_t mask_32 = { 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
+ 0x1C, 0x1D, 0x1E, 0x1F, 0x08, 0x09, 0x0A, 0x0B };
+
+ int32x4_t sum_32x4_0 = { 0, 0, 0, round_offset };
+ int32x4_t sum_32x4_1 = { 0, 0, 0, 0 };
+ do {
+ sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_0, sum_buf), sum_32x4_0);
+ sum_32x4_1 = vec_sum4s(vec_vsx_ld(OFF_0 + CFL_LINE_1, sum_buf), sum_32x4_1);
+ if (width >= 16) {
+ sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_1, sum_buf), sum_32x4_0);
+ sum_32x4_1 =
+ vec_sum4s(vec_vsx_ld(OFF_1 + CFL_LINE_1, sum_buf), sum_32x4_1);
+ }
+ if (width == 32) {
+ sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_2, sum_buf), sum_32x4_0);
+ sum_32x4_1 =
+ vec_sum4s(vec_vsx_ld(OFF_2 + CFL_LINE_1, sum_buf), sum_32x4_1);
+ sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_3, sum_buf), sum_32x4_0);
+ sum_32x4_1 =
+ vec_sum4s(vec_vsx_ld(OFF_3 + CFL_LINE_1, sum_buf), sum_32x4_1);
+ }
+ } while ((sum_buf += (CFL_BUF_LINE * 2)) < end);
+ int32x4_t sum_32x4 = vec_add(sum_32x4_0, sum_32x4_1);
+
+ const int32x4_t perm_64 = vec_perm(sum_32x4, sum_32x4, mask_64);
+ sum_32x4 = vec_add(sum_32x4, perm_64);
+ const int32x4_t perm_32 = vec_perm(sum_32x4, sum_32x4, mask_32);
+ sum_32x4 = vec_add(sum_32x4, perm_32);
+ const int32x4_t avg = vec_sr(sum_32x4, div_shift);
+ const int16x8_t vec_avg = vec_pack(avg, avg);
+ do {
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, pred_buf), vec_avg), OFF_0, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, pred_buf), vec_avg),
+ OFF_0 + CFL_BUF_LINE_BYTES, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, pred_buf), vec_avg),
+ OFF_0 + CFL_LINE_2, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, pred_buf), vec_avg),
+ OFF_0 + CFL_LINE_3, pred_buf);
+ if (width >= 16) {
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, pred_buf), vec_avg), OFF_1,
+ pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, pred_buf), vec_avg),
+ OFF_1 + CFL_LINE_1, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, pred_buf), vec_avg),
+ OFF_1 + CFL_LINE_2, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, pred_buf), vec_avg),
+ OFF_1 + CFL_LINE_3, pred_buf);
+ }
+ if (width == 32) {
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, pred_buf), vec_avg), OFF_2,
+ pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, pred_buf), vec_avg),
+ OFF_2 + CFL_LINE_1, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, pred_buf), vec_avg),
+ OFF_2 + CFL_LINE_2, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, pred_buf), vec_avg),
+ OFF_2 + CFL_LINE_3, pred_buf);
+
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, pred_buf), vec_avg), OFF_3,
+ pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, pred_buf), vec_avg),
+ OFF_3 + CFL_LINE_1, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, pred_buf), vec_avg),
+ OFF_3 + CFL_LINE_2, pred_buf);
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, pred_buf), vec_avg),
+ OFF_3 + CFL_LINE_3, pred_buf);
+ }
+ } while ((pred_buf += CFL_BUF_LINE * 4) < end);
+}
+
+// Declare wrappers for VSX sizes
+CFL_SUB_AVG_X(vsx, 8, 4, 16, 5)
+CFL_SUB_AVG_X(vsx, 8, 8, 32, 6)
+CFL_SUB_AVG_X(vsx, 8, 16, 64, 7)
+CFL_SUB_AVG_X(vsx, 8, 32, 128, 8)
+CFL_SUB_AVG_X(vsx, 16, 4, 32, 6)
+CFL_SUB_AVG_X(vsx, 16, 8, 64, 7)
+CFL_SUB_AVG_X(vsx, 16, 16, 128, 8)
+CFL_SUB_AVG_X(vsx, 16, 32, 256, 9)
+CFL_SUB_AVG_X(vsx, 32, 8, 128, 8)
+CFL_SUB_AVG_X(vsx, 32, 16, 256, 9)
+CFL_SUB_AVG_X(vsx, 32, 32, 512, 10)
+
+// Based on observation, for small blocks VSX does not outperform C (no 64bit
+// load and store intrinsics). So we call the C code for block widths 4.
+cfl_subtract_average_fn get_subtract_average_fn_vsx(TX_SIZE tx_size) {
+ static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {
+ subtract_average_4x4_c, /* 4x4 */
+ subtract_average_8x8_vsx, /* 8x8 */
+ subtract_average_16x16_vsx, /* 16x16 */
+ subtract_average_32x32_vsx, /* 32x32 */
+ cfl_subtract_average_null, /* 64x64 (invalid CFL size) */
+ subtract_average_4x8_c, /* 4x8 */
+ subtract_average_8x4_vsx, /* 8x4 */
+ subtract_average_8x16_vsx, /* 8x16 */
+ subtract_average_16x8_vsx, /* 16x8 */
+ subtract_average_16x32_vsx, /* 16x32 */
+ subtract_average_32x16_vsx, /* 32x16 */
+ cfl_subtract_average_null, /* 32x64 (invalid CFL size) */
+ cfl_subtract_average_null, /* 64x32 (invalid CFL size) */
+ subtract_average_4x16_c, /* 4x16 */
+ subtract_average_16x4_vsx, /* 16x4 */
+ subtract_average_8x32_vsx, /* 8x32 */
+ subtract_average_32x8_vsx, /* 32x8 */
+ cfl_subtract_average_null, /* 16x64 (invalid CFL size) */
+ cfl_subtract_average_null, /* 64x16 (invalid CFL size) */
+ };
+ // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to
+ // index the function pointer array out of bounds.
+ return sub_avg[tx_size % TX_SIZES_ALL];
+}
diff --git a/third_party/aom/av1/common/pred_common.c b/third_party/aom/av1/common/pred_common.c
index 51fd0389e..d77739d85 100644
--- a/third_party/aom/av1/common/pred_common.c
+++ b/third_party/aom/av1/common/pred_common.c
@@ -12,30 +12,23 @@
#include "av1/common/common.h"
#include "av1/common/pred_common.h"
#include "av1/common/reconinter.h"
-#if CONFIG_EXT_INTRA
#include "av1/common/reconintra.h"
-#endif // CONFIG_EXT_INTRA
#include "av1/common/seg_common.h"
// Returns a context number for the given MB prediction signal
-#if CONFIG_DUAL_FILTER
-static InterpFilter get_ref_filter_type(const MODE_INFO *mi,
+static InterpFilter get_ref_filter_type(const MB_MODE_INFO *ref_mbmi,
const MACROBLOCKD *xd, int dir,
MV_REFERENCE_FRAME ref_frame) {
- const MB_MODE_INFO *ref_mbmi = &mi->mbmi;
- int use_subpel[2] = {
- has_subpel_mv_component(mi, xd, dir),
- has_subpel_mv_component(mi, xd, dir + 2),
- };
-
- return (((ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0]) ||
- (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1]))
+ (void)xd;
+
+ return ((ref_mbmi->ref_frame[0] == ref_frame ||
+ ref_mbmi->ref_frame[1] == ref_frame)
? av1_extract_interp_filter(ref_mbmi->interp_filters, dir & 0x01)
: SWITCHABLE_FILTERS);
}
int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
const int ctx_offset =
(mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET;
MV_REFERENCE_FRAME ref_frame =
@@ -69,132 +62,57 @@ int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
return filter_type_ctx;
}
-#else
-int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int left_type =
- xd->left_available && is_inter_block(left_mbmi)
- ? av1_extract_interp_filter(left_mbmi->interp_filters, 0)
- : SWITCHABLE_FILTERS;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const int above_type =
- xd->up_available && is_inter_block(above_mbmi)
- ? av1_extract_interp_filter(above_mbmi->interp_filters, 0)
- : SWITCHABLE_FILTERS;
-
- if (left_type == above_type) {
- return left_type;
- } else if (left_type == SWITCHABLE_FILTERS) {
- assert(above_type != SWITCHABLE_FILTERS);
- return above_type;
- } else if (above_type == SWITCHABLE_FILTERS) {
- assert(left_type != SWITCHABLE_FILTERS);
- return left_type;
- } else {
- return SWITCHABLE_FILTERS;
- }
-}
-#endif
-
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-// Obtain the reference filter type from the above/left neighbor blocks.
-static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) {
- INTRA_FILTER ref_type = INTRA_FILTERS;
-
- if (ref_mbmi->sb_type >= BLOCK_8X8) {
- const PREDICTION_MODE mode = ref_mbmi->mode;
- if (is_inter_block(ref_mbmi)) {
- switch (av1_extract_interp_filter(ref_mbmi->interp_filters, 0)) {
- case EIGHTTAP_REGULAR: ref_type = INTRA_FILTER_8TAP; break;
- case EIGHTTAP_SMOOTH: ref_type = INTRA_FILTER_8TAP_SMOOTH; break;
- case MULTITAP_SHARP: ref_type = INTRA_FILTER_8TAP_SHARP; break;
- case BILINEAR: ref_type = INTRA_FILTERS; break;
- default: break;
- }
- } else {
- if (av1_is_directional_mode(mode, ref_mbmi->sb_type)) {
- const int p_angle =
- mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * ANGLE_STEP;
- if (av1_is_intra_filter_switchable(p_angle)) {
- ref_type = ref_mbmi->intra_filter;
- }
- }
- }
- }
- return ref_type;
-}
-
-int av1_get_pred_context_intra_interp(const MACROBLOCKD *xd) {
- int left_type = INTRA_FILTERS, above_type = INTRA_FILTERS;
- if (xd->left_available) left_type = get_ref_intra_filter(xd->left_mbmi);
+static void palette_add_to_cache(uint16_t *cache, int *n, uint16_t val) {
+ // Do not add an already existing value
+ if (*n > 0 && val == cache[*n - 1]) return;
- if (xd->up_available) above_type = get_ref_intra_filter(xd->above_mbmi);
-
- if (left_type == above_type)
- return left_type;
- else if (left_type == INTRA_FILTERS && above_type != INTRA_FILTERS)
- return above_type;
- else if (left_type != INTRA_FILTERS && above_type == INTRA_FILTERS)
- return left_type;
- else
- return INTRA_FILTERS;
+ cache[(*n)++] = val;
}
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE_DELTA_ENCODING
int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
uint16_t *cache) {
const int row = -xd->mb_to_top_edge >> 3;
// Do not refer to above SB row when on SB boundary.
- const MODE_INFO *const above_mi =
- (row % (1 << MIN_SB_SIZE_LOG2)) ? xd->above_mi : NULL;
- const MODE_INFO *const left_mi = xd->left_mi;
+ const MB_MODE_INFO *const above_mi =
+ (row % (1 << MIN_SB_SIZE_LOG2)) ? xd->above_mbmi : NULL;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
int above_n = 0, left_n = 0;
- if (above_mi)
- above_n = above_mi->mbmi.palette_mode_info.palette_size[plane != 0];
- if (left_mi)
- left_n = left_mi->mbmi.palette_mode_info.palette_size[plane != 0];
+ if (above_mi) above_n = above_mi->palette_mode_info.palette_size[plane != 0];
+ if (left_mi) left_n = left_mi->palette_mode_info.palette_size[plane != 0];
if (above_n == 0 && left_n == 0) return 0;
int above_idx = plane * PALETTE_MAX_SIZE;
int left_idx = plane * PALETTE_MAX_SIZE;
int n = 0;
const uint16_t *above_colors =
- above_mi ? above_mi->mbmi.palette_mode_info.palette_colors : NULL;
+ above_mi ? above_mi->palette_mode_info.palette_colors : NULL;
const uint16_t *left_colors =
- left_mi ? left_mi->mbmi.palette_mode_info.palette_colors : NULL;
+ left_mi ? left_mi->palette_mode_info.palette_colors : NULL;
// Merge the sorted lists of base colors from above and left to get
// combined sorted color cache.
while (above_n > 0 && left_n > 0) {
uint16_t v_above = above_colors[above_idx];
uint16_t v_left = left_colors[left_idx];
if (v_left < v_above) {
- if (n == 0 || v_left != cache[n - 1]) cache[n++] = v_left;
+ palette_add_to_cache(cache, &n, v_left);
++left_idx, --left_n;
} else {
- if (n == 0 || v_above != cache[n - 1]) cache[n++] = v_above;
+ palette_add_to_cache(cache, &n, v_above);
++above_idx, --above_n;
if (v_left == v_above) ++left_idx, --left_n;
}
}
while (above_n-- > 0) {
uint16_t val = above_colors[above_idx++];
- if (n == 0 || val != cache[n - 1]) cache[n++] = val;
+ palette_add_to_cache(cache, &n, val);
}
while (left_n-- > 0) {
uint16_t val = left_colors[left_idx++];
- if (n == 0 || val != cache[n - 1]) cache[n++] = val;
+ palette_add_to_cache(cache, &n, val);
}
assert(n <= 2 * PALETTE_MAX_SIZE);
return n;
}
-#endif // CONFIG_PALETTE_DELTA_ENCODING
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
@@ -220,65 +138,17 @@ int av1_get_intra_inter_context(const MACROBLOCKD *xd) {
}
}
-#if CONFIG_COMPOUND_SINGLEREF
-// The compound/single mode info data structure has one element border above and
-// to the left of the entries corresponding to real macroblocks.
-// The prediction flags in these dummy entries are initialized to 0.
-int av1_get_inter_mode_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- if (has_above && has_left) { // both edges available
- const int above_inter_comp_mode =
- is_inter_anyref_comp_mode(above_mbmi->mode);
- const int left_inter_comp_mode = is_inter_anyref_comp_mode(left_mbmi->mode);
- if (above_inter_comp_mode && left_inter_comp_mode)
- return 0;
- else if (above_inter_comp_mode || left_inter_comp_mode)
- return 1;
- else if (!is_inter_block(above_mbmi) && !is_inter_block(left_mbmi))
- return 2;
- else
- return 3;
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *const edge_mbmi = has_above ? above_mbmi : left_mbmi;
- if (is_inter_anyref_comp_mode(edge_mbmi->mode))
- return 1;
- else if (!is_inter_block(edge_mbmi))
- return 2;
- else
- return 3;
- } else { // no edge available
- return 2;
- }
-}
-#endif // CONFIG_COMPOUND_SINGLEREF
-
-#if CONFIG_EXT_REFS
#define CHECK_BACKWARD_REFS(ref_frame) \
(((ref_frame) >= BWDREF_FRAME) && ((ref_frame) <= ALTREF_FRAME))
#define IS_BACKWARD_REF_FRAME(ref_frame) CHECK_BACKWARD_REFS(ref_frame)
-#else
-#define IS_BACKWARD_REF_FRAME(ref_frame) ((ref_frame) == cm->comp_fixed_ref)
-#endif // CONFIG_EXT_REFS
-
-#define CHECK_GOLDEN_OR_LAST3(ref_frame) \
- (((ref_frame) == GOLDEN_FRAME) || ((ref_frame) == LAST3_FRAME))
-int av1_get_reference_mode_context(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
+int av1_get_reference_mode_context(const MACROBLOCKD *xd) {
int ctx;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
const int has_above = xd->up_available;
const int has_left = xd->left_available;
-#if CONFIG_EXT_REFS
- (void)cm;
-#endif // CONFIG_EXT_REFS
-
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
@@ -314,9 +184,6 @@ int av1_get_reference_mode_context(const AV1_COMMON *cm,
return ctx;
}
-#if CONFIG_EXT_COMP_REFS
-// TODO(zoeliu): To try on the design of 3 contexts, instead of 5:
-// COMP_REF_TYPE_CONTEXTS = 3
int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
int pred_context;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
@@ -344,9 +211,8 @@ int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0];
if (a_sg && l_sg) { // single/single
- pred_context =
- 1 +
- 2 * (!(IS_BACKWARD_REF_FRAME(frfa) ^ IS_BACKWARD_REF_FRAME(frfl)));
+ pred_context = 1 + 2 * (!(IS_BACKWARD_REF_FRAME(frfa) ^
+ IS_BACKWARD_REF_FRAME(frfl)));
} else if (l_sg || a_sg) { // single/comp
const int uni_rfc =
a_sg ? has_uni_comp_refs(left_mbmi) : has_uni_comp_refs(above_mbmi);
@@ -397,44 +263,16 @@ int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
// 3 contexts: Voting is used to compare the count of forward references with
// that of backward references from the spatial neighbors.
int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
// Count of forward references (L, L2, L3, or G)
- int frf_count = 0;
+ const int frf_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] +
+ ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
// Count of backward references (B or A)
- int brf_count = 0;
-
- if (above_in_image && is_inter_block(above_mbmi)) {
- if (above_mbmi->ref_frame[0] <= GOLDEN_FRAME)
- ++frf_count;
- else
- ++brf_count;
- if (has_second_ref(above_mbmi)) {
- if (above_mbmi->ref_frame[1] <= GOLDEN_FRAME)
- ++frf_count;
- else
- ++brf_count;
- }
- }
-
- if (left_in_image && is_inter_block(left_mbmi)) {
- if (left_mbmi->ref_frame[0] <= GOLDEN_FRAME)
- ++frf_count;
- else
- ++brf_count;
- if (has_second_ref(left_mbmi)) {
- if (left_mbmi->ref_frame[1] <= GOLDEN_FRAME)
- ++frf_count;
- else
- ++brf_count;
- }
- }
+ const int brf_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] +
+ ref_counts[ALTREF_FRAME];
- pred_context =
+ const int pred_context =
(frf_count == brf_count) ? 1 : ((frf_count < brf_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
@@ -450,50 +288,17 @@ int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd) {
// 3 contexts: Voting is used to compare the count of LAST2_FRAME with the
// total count of LAST3/GOLDEN from the spatial neighbors.
int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
// Count of LAST2
- int last2_count = 0;
+ const int last2_count = ref_counts[LAST2_FRAME];
// Count of LAST3 or GOLDEN
- int last3_or_gld_count = 0;
-
- if (above_in_image && is_inter_block(above_mbmi)) {
- last2_count = (above_mbmi->ref_frame[0] == LAST2_FRAME) ? last2_count + 1
- : last2_count;
- last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(above_mbmi->ref_frame[0])
- ? last3_or_gld_count + 1
- : last3_or_gld_count;
- if (has_second_ref(above_mbmi)) {
- last2_count = (above_mbmi->ref_frame[1] == LAST2_FRAME) ? last2_count + 1
- : last2_count;
- last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(above_mbmi->ref_frame[1])
- ? last3_or_gld_count + 1
- : last3_or_gld_count;
- }
- }
+ const int last3_or_gld_count =
+ ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
- if (left_in_image && is_inter_block(left_mbmi)) {
- last2_count = (left_mbmi->ref_frame[0] == LAST2_FRAME) ? last2_count + 1
- : last2_count;
- last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(left_mbmi->ref_frame[0])
- ? last3_or_gld_count + 1
- : last3_or_gld_count;
- if (has_second_ref(left_mbmi)) {
- last2_count = (left_mbmi->ref_frame[1] == LAST2_FRAME) ? last2_count + 1
- : last2_count;
- last3_or_gld_count = CHECK_GOLDEN_OR_LAST3(left_mbmi->ref_frame[1])
- ? last3_or_gld_count + 1
- : last3_or_gld_count;
- }
- }
-
- pred_context = (last2_count == last3_or_gld_count)
- ? 1
- : ((last2_count < last3_or_gld_count) ? 0 : 2);
+ const int pred_context = (last2_count == last3_or_gld_count)
+ ? 1
+ : ((last2_count < last3_or_gld_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
return pred_context;
@@ -508,415 +313,83 @@ int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd) {
// 3 contexts: Voting is used to compare the count of LAST3_FRAME with the
// total count of GOLDEN_FRAME from the spatial neighbors.
int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
// Count of LAST3
- int last3_count = 0;
+ const int last3_count = ref_counts[LAST3_FRAME];
// Count of GOLDEN
- int gld_count = 0;
-
- if (above_in_image && is_inter_block(above_mbmi)) {
- last3_count = (above_mbmi->ref_frame[0] == LAST3_FRAME) ? last3_count + 1
- : last3_count;
- gld_count =
- (above_mbmi->ref_frame[0] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
- if (has_second_ref(above_mbmi)) {
- last3_count = (above_mbmi->ref_frame[1] == LAST3_FRAME) ? last3_count + 1
- : last3_count;
- gld_count = (above_mbmi->ref_frame[1] == GOLDEN_FRAME) ? gld_count + 1
- : gld_count;
- }
- }
+ const int gld_count = ref_counts[GOLDEN_FRAME];
- if (left_in_image && is_inter_block(left_mbmi)) {
- last3_count = (left_mbmi->ref_frame[0] == LAST3_FRAME) ? last3_count + 1
- : last3_count;
- gld_count =
- (left_mbmi->ref_frame[0] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
- if (has_second_ref(left_mbmi)) {
- last3_count = (left_mbmi->ref_frame[1] == LAST3_FRAME) ? last3_count + 1
- : last3_count;
- gld_count =
- (left_mbmi->ref_frame[1] == GOLDEN_FRAME) ? gld_count + 1 : gld_count;
- }
- }
-
- pred_context =
+ const int pred_context =
(last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
return pred_context;
}
-#endif // CONFIG_EXT_COMP_REFS
-
-#if CONFIG_EXT_REFS
-// TODO(zoeliu): Future work will be conducted to optimize the context design
-// for the coding of the reference frames.
-
-#define CHECK_LAST_OR_LAST2(ref_frame) \
- ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME))
-
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be either
-// GOLDEN/LAST3, or LAST/LAST2.
+// == Common context functions for both comp and single ref ==
//
-// NOTE(zoeliu): The probability of ref_frame[0] is either
-// GOLDEN_FRAME or LAST3_FRAME.
-int av1_get_pred_context_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+// Obtain contexts to signal a reference frame to be either LAST/LAST2 or
+// LAST3/GOLDEN.
+static int get_pred_context_ll2_or_l3gld(const MACROBLOCKD *xd) {
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-// Note:
-// The mode info data structure has a one element border above and to the
-// left of the entries correpsonding to real macroblocks.
-// The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
- const int bwd_ref_sign_idx = 1;
-#else
- const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
-
- (void)cm;
+ // Count of LAST + LAST2
+ const int last_last2_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME];
+ // Count of LAST3 + GOLDEN
+ const int last3_gld_count =
+ ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
- if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra (2)
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(edge_mbmi)) // single pred (1/3)
- pred_context =
- 1 + 2 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
- else // comp pred (1/3)
- pred_context = 1 +
- 2 * (!CHECK_GOLDEN_OR_LAST3(
- edge_mbmi->ref_frame[fwd_ref_sign_idx]));
- } else { // inter/inter
- const int l_sg = !has_second_ref(left_mbmi);
- const int a_sg = !has_second_ref(above_mbmi);
- const MV_REFERENCE_FRAME frfa =
- a_sg ? above_mbmi->ref_frame[0]
- : above_mbmi->ref_frame[fwd_ref_sign_idx];
- const MV_REFERENCE_FRAME frfl =
- l_sg ? left_mbmi->ref_frame[0]
- : left_mbmi->ref_frame[fwd_ref_sign_idx];
-
- if (frfa == frfl && CHECK_GOLDEN_OR_LAST3(frfa)) {
- pred_context = 0;
- } else if (l_sg && a_sg) { // single/single
- if ((CHECK_BACKWARD_REFS(frfa) && CHECK_LAST_OR_LAST2(frfl)) ||
- (CHECK_BACKWARD_REFS(frfl) && CHECK_LAST_OR_LAST2(frfa))) {
- pred_context = 4;
- } else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)) {
- pred_context = 1;
- } else {
- pred_context = 3;
- }
- } else if (l_sg || a_sg) { // single/comp
- const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
- const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
-
- if (CHECK_GOLDEN_OR_LAST3(frfc) && !CHECK_GOLDEN_OR_LAST3(rfs))
- pred_context = 1;
- else if (CHECK_GOLDEN_OR_LAST3(rfs) && !CHECK_GOLDEN_OR_LAST3(frfc))
- pred_context = 2;
- else
- pred_context = 4;
- } else { // comp/comp
- if ((CHECK_LAST_OR_LAST2(frfa) && CHECK_LAST_OR_LAST2(frfl))) {
- pred_context = 4;
- } else {
-// NOTE(zoeliu): Following assert may be removed once confirmed.
-#if !USE_UNI_COMP_REFS
- // TODO(zoeliu): To further study the UNIDIR scenario
- assert(CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
-#endif // !USE_UNI_COMP_REFS
- pred_context = 2;
- }
- }
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi)) {
- pred_context = 2;
- } else {
- if (has_second_ref(edge_mbmi))
- pred_context =
- 4 *
- (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[fwd_ref_sign_idx]));
- else
- pred_context = 3 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
- }
- } else { // no edges available (2)
- pred_context = 2;
- }
+ const int pred_context = (last_last2_count == last3_gld_count)
+ ? 1
+ : ((last_last2_count < last3_gld_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-
return pred_context;
}
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be LAST,
-// conditioning on that it is known either LAST/LAST2.
-//
-// NOTE(zoeliu): The probability of ref_frame[0] is LAST_FRAME,
-// conditioning on it is either LAST_FRAME or LAST2_FRAME.
-int av1_get_pred_context_comp_ref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
-
-// Note:
-// The mode info data structure has a one element border above and to the
-// left of the entries correpsonding to real macroblocks.
-// The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
- const int bwd_ref_sign_idx = 1;
-#else
- const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
-
- (void)cm;
-
- if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra (2)
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(edge_mbmi)) // single pred (1/3)
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME);
- else // comp pred (1/3)
- pred_context =
- 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
- } else { // inter/inter
- const int l_sg = !has_second_ref(left_mbmi);
- const int a_sg = !has_second_ref(above_mbmi);
- const MV_REFERENCE_FRAME frfa =
- a_sg ? above_mbmi->ref_frame[0]
- : above_mbmi->ref_frame[fwd_ref_sign_idx];
- const MV_REFERENCE_FRAME frfl =
- l_sg ? left_mbmi->ref_frame[0]
- : left_mbmi->ref_frame[fwd_ref_sign_idx];
-
- if (frfa == frfl && frfa == LAST_FRAME)
- pred_context = 0;
- else if (l_sg && a_sg) { // single/single
- if (frfa == LAST_FRAME || frfl == LAST_FRAME)
- pred_context = 1;
- else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl))
- pred_context = 2 + (frfa != frfl);
- else if (frfa == frfl ||
- (CHECK_BACKWARD_REFS(frfa) && CHECK_BACKWARD_REFS(frfl)))
- pred_context = 3;
- else
- pred_context = 4;
- } else if (l_sg || a_sg) { // single/comp
- const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
- const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+// Obtain contexts to signal a reference frame to be either LAST or LAST2.
+static int get_pred_context_last_or_last2(const MACROBLOCKD *xd) {
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
- if (frfc == LAST_FRAME && rfs != LAST_FRAME)
- pred_context = 1;
- else if (rfs == LAST_FRAME && frfc != LAST_FRAME)
- pred_context = 2;
- else
- pred_context =
- 3 + (frfc == LAST2_FRAME || CHECK_GOLDEN_OR_LAST3(rfs));
- } else { // comp/comp
- if (frfa == LAST_FRAME || frfl == LAST_FRAME)
- pred_context = 2;
- else
- pred_context =
- 3 + (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
- }
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+ // Count of LAST
+ const int last_count = ref_counts[LAST_FRAME];
+ // Count of LAST2
+ const int last2_count = ref_counts[LAST2_FRAME];
- if (!is_inter_block(edge_mbmi)) {
- pred_context = 2;
- } else {
- if (has_second_ref(edge_mbmi)) {
- pred_context =
- 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
- } else {
- if (edge_mbmi->ref_frame[0] == LAST_FRAME)
- pred_context = 0;
- else
- pred_context = 2 + CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]);
- }
- }
- } else { // no edges available (2)
- pred_context = 2;
- }
+ const int pred_context =
+ (last_count == last2_count) ? 1 : ((last_count < last2_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-
return pred_context;
}
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be GOLDEN,
-// conditioning on that it is known either GOLDEN or LAST3.
-//
-// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME,
-// conditioning on it is either GOLDEN or LAST3.
-int av1_get_pred_context_comp_ref_p2(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
-
-// Note:
-// The mode info data structure has a one element border above and to the
-// left of the entries correpsonding to real macroblocks.
-// The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- const int bwd_ref_sign_idx = 1;
-#else
- const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
- const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
-
- (void)cm;
+// Obtain contexts to signal a reference frame to be either LAST3 or GOLDEN.
+static int get_pred_context_last3_or_gld(const MACROBLOCKD *xd) {
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
- if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra (2)
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(edge_mbmi)) // single pred (1/3)
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME);
- else // comp pred (1/3)
- pred_context =
- 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
- } else { // inter/inter
- const int l_sg = !has_second_ref(left_mbmi);
- const int a_sg = !has_second_ref(above_mbmi);
- const MV_REFERENCE_FRAME frfa =
- a_sg ? above_mbmi->ref_frame[0]
- : above_mbmi->ref_frame[fwd_ref_sign_idx];
- const MV_REFERENCE_FRAME frfl =
- l_sg ? left_mbmi->ref_frame[0]
- : left_mbmi->ref_frame[fwd_ref_sign_idx];
-
- if (frfa == frfl && frfa == GOLDEN_FRAME)
- pred_context = 0;
- else if (l_sg && a_sg) { // single/single
- if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
- pred_context = 1;
- else if (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl))
- pred_context = 2 + (frfa != frfl);
- else if (frfa == frfl ||
- (CHECK_BACKWARD_REFS(frfa) && CHECK_BACKWARD_REFS(frfl)))
- pred_context = 3;
- else
- pred_context = 4;
- } else if (l_sg || a_sg) { // single/comp
- const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
- const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
-
- if (frfc == GOLDEN_FRAME && rfs != GOLDEN_FRAME)
- pred_context = 1;
- else if (rfs == GOLDEN_FRAME && frfc != GOLDEN_FRAME)
- pred_context = 2;
- else
- pred_context = 3 + (frfc == LAST3_FRAME || CHECK_LAST_OR_LAST2(rfs));
- } else { // comp/comp
- if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
- pred_context = 2;
- else
- pred_context =
- 3 + (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl));
- }
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+ // Count of LAST3
+ const int last3_count = ref_counts[LAST3_FRAME];
+ // Count of GOLDEN
+ const int gld_count = ref_counts[GOLDEN_FRAME];
- if (!is_inter_block(edge_mbmi)) {
- pred_context = 2;
- } else {
- if (has_second_ref(edge_mbmi)) {
- pred_context =
- 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
- } else {
- if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME)
- pred_context = 0;
- else
- pred_context = 2 + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
- }
- }
- } else { // no edges available (2)
- pred_context = 2;
- }
+ const int pred_context =
+ (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-
return pred_context;
}
// Obtain contexts to signal a reference frame be either BWDREF/ALTREF2, or
// ALTREF.
-int av1_get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+static int get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) {
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
// Counts of BWDREF, ALTREF2, or ALTREF frames (B, A2, or A)
- int bwdref_counts[ALTREF_FRAME - BWDREF_FRAME + 1] = { 0 };
-
- if (above_in_image && is_inter_block(above_mbmi)) {
- if (above_mbmi->ref_frame[0] >= BWDREF_FRAME)
- ++bwdref_counts[above_mbmi->ref_frame[0] - BWDREF_FRAME];
- if (has_second_ref(above_mbmi)) {
- if (above_mbmi->ref_frame[1] >= BWDREF_FRAME)
- ++bwdref_counts[above_mbmi->ref_frame[1] - BWDREF_FRAME];
- }
- }
+ const int brfarf2_count =
+ ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME];
+ const int arf_count = ref_counts[ALTREF_FRAME];
- if (left_in_image && is_inter_block(left_mbmi)) {
- if (left_mbmi->ref_frame[0] >= BWDREF_FRAME)
- ++bwdref_counts[left_mbmi->ref_frame[0] - BWDREF_FRAME];
- if (has_second_ref(left_mbmi)) {
- if (left_mbmi->ref_frame[1] >= BWDREF_FRAME)
- ++bwdref_counts[left_mbmi->ref_frame[1] - BWDREF_FRAME];
- }
- }
-
- const int brfarf2_count = bwdref_counts[BWDREF_FRAME - BWDREF_FRAME] +
- bwdref_counts[ALTREF2_FRAME - BWDREF_FRAME];
- const int arf_count = bwdref_counts[ALTREF_FRAME - BWDREF_FRAME];
const int pred_context =
(brfarf2_count == arf_count) ? 1 : ((brfarf2_count < arf_count) ? 0 : 2);
@@ -925,42 +398,13 @@ int av1_get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) {
}
// Obtain contexts to signal a reference frame be either BWDREF or ALTREF2.
-int av1_get_pred_context_brf_or_arf2(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+static int get_pred_context_brf_or_arf2(const MACROBLOCKD *xd) {
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
// Count of BWDREF frames (B)
- int brf_count = 0;
+ const int brf_count = ref_counts[BWDREF_FRAME];
// Count of ALTREF2 frames (A2)
- int arf2_count = 0;
-
- if (above_in_image && is_inter_block(above_mbmi)) {
- if (above_mbmi->ref_frame[0] == BWDREF_FRAME)
- ++brf_count;
- else if (above_mbmi->ref_frame[0] == ALTREF2_FRAME)
- ++arf2_count;
- if (has_second_ref(above_mbmi)) {
- if (above_mbmi->ref_frame[1] == BWDREF_FRAME)
- ++brf_count;
- else if (above_mbmi->ref_frame[1] == ALTREF2_FRAME)
- ++arf2_count;
- }
- }
-
- if (left_in_image && is_inter_block(left_mbmi)) {
- if (left_mbmi->ref_frame[0] == BWDREF_FRAME)
- ++brf_count;
- else if (left_mbmi->ref_frame[0] == ALTREF2_FRAME)
- ++arf2_count;
- if (has_second_ref(left_mbmi)) {
- if (left_mbmi->ref_frame[1] == BWDREF_FRAME)
- ++brf_count;
- else if (left_mbmi->ref_frame[1] == ALTREF2_FRAME)
- ++arf2_count;
- }
- }
+ const int arf2_count = ref_counts[ALTREF2_FRAME];
const int pred_context =
(brf_count == arf2_count) ? 1 : ((brf_count < arf2_count) ? 0 : 2);
@@ -969,168 +413,57 @@ int av1_get_pred_context_brf_or_arf2(const MACROBLOCKD *xd) {
return pred_context;
}
-// Signal the 2nd reference frame for a compound mode be either
-// ALTREF, or ALTREF2/BWDREF.
-int av1_get_pred_context_comp_bwdref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- (void)cm;
- return av1_get_pred_context_brfarf2_or_arf(xd);
+// == Context functions for comp ref ==
+//
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be either
+// GOLDEN/LAST3, or LAST/LAST2.
+int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd) {
+ return get_pred_context_ll2_or_l3gld(xd);
}
-// Signal the 2nd reference frame for a compound mode be either
-// ALTREF2 or BWDREF.
-int av1_get_pred_context_comp_bwdref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- (void)cm;
- return av1_get_pred_context_brf_or_arf2(xd);
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be LAST,
+// conditioning on that it is known either LAST/LAST2.
+int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd) {
+ return get_pred_context_last_or_last2(xd);
}
-#else // !CONFIG_EXT_REFS
-
// Returns a context number for the given MB prediction signal
-int av1_get_pred_context_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
- const int var_ref_idx = !fix_ref_idx;
-
- if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra (2)
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(edge_mbmi)) // single pred (1/3)
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
- else // comp pred (1/3)
- pred_context =
- 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
- } else { // inter/inter
- const int l_sg = !has_second_ref(left_mbmi);
- const int a_sg = !has_second_ref(above_mbmi);
- const MV_REFERENCE_FRAME vrfa =
- a_sg ? above_mbmi->ref_frame[0] : above_mbmi->ref_frame[var_ref_idx];
- const MV_REFERENCE_FRAME vrfl =
- l_sg ? left_mbmi->ref_frame[0] : left_mbmi->ref_frame[var_ref_idx];
-
- if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
- pred_context = 0;
- } else if (l_sg && a_sg) { // single/single
- if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) ||
- (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0]))
- pred_context = 4;
- else if (vrfa == vrfl)
- pred_context = 3;
- else
- pred_context = 1;
- } else if (l_sg || a_sg) { // single/comp
- const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
- const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
- if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1])
- pred_context = 1;
- else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1])
- pred_context = 2;
- else
- pred_context = 4;
- } else if (vrfa == vrfl) { // comp/comp
- pred_context = 4;
- } else {
- pred_context = 2;
- }
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi)) {
- pred_context = 2;
- } else {
- if (has_second_ref(edge_mbmi))
- pred_context =
- 4 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
- else
- pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
- }
- } else { // no edges available (2)
- pred_context = 2;
- }
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-
- return pred_context;
+// Signal the first reference frame for a compound mode be GOLDEN,
+// conditioning on that it is known either GOLDEN or LAST3.
+int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd) {
+ return get_pred_context_last3_or_gld(xd);
}
-#endif // CONFIG_EXT_REFS
+// Signal the 2nd reference frame for a compound mode be either
+// ALTREF, or ALTREF2/BWDREF.
+int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd) {
+ return get_pred_context_brfarf2_or_arf(xd);
+}
-#if CONFIG_EXT_REFS
+// Signal the 2nd reference frame for a compound mode be either
+// ALTREF2 or BWDREF.
+int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd) {
+ return get_pred_context_brf_or_arf2(xd);
+}
+// == Context functions for single ref ==
+//
// For the bit to signal whether the single reference is a forward reference
// frame or a backward reference frame.
int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(edge_mbmi)) // single
- pred_context = 4 * (!CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]));
- else // comp
- pred_context = 2;
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
+ const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ // Count of forward reference frames
+ const int fwd_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] +
+ ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
+ // Count of backward reference frames
+ const int bwd_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] +
+ ref_counts[ALTREF_FRAME];
- if (above_has_second && left_has_second) { // comp/comp
- pred_context = 2;
- } else if (above_has_second || left_has_second) { // single/comp
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
-
- pred_context = (!CHECK_BACKWARD_REFS(rfs)) ? 4 : 1;
- } else { // single/single
- pred_context = 2 * (!CHECK_BACKWARD_REFS(above0)) +
- 2 * (!CHECK_BACKWARD_REFS(left0));
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
- if (!is_inter_block(edge_mbmi)) { // intra
- pred_context = 2;
- } else { // inter
- if (!has_second_ref(edge_mbmi)) // single
- pred_context = 4 * (!CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]));
- else // comp
- pred_context = 2;
- }
- } else { // no edges available
- pred_context = 2;
- }
+ const int pred_context =
+ (fwd_count == bwd_count) ? 1 : ((fwd_count < bwd_count) ? 0 : 2);
assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
return pred_context;
@@ -1140,445 +473,29 @@ int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
// non-ALTREF backward reference frame, knowing that it shall be either of
// these 2 choices.
int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
- return av1_get_pred_context_brfarf2_or_arf(xd);
+ return get_pred_context_brfarf2_or_arf(xd);
}
// For the bit to signal whether the single reference is LAST3/GOLDEN or
// LAST2/LAST, knowing that it shall be either of these 2 choices.
int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi)) { // single
- if (CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]))
- pred_context = 3;
- else
- pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
- } else { // comp
- pred_context = 1 +
- 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
- CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
- }
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
- if (above_has_second && left_has_second) { // comp/comp
- if (above0 == left0 && above1 == left1)
- pred_context =
- 3 * (CHECK_LAST_OR_LAST2(above0) || CHECK_LAST_OR_LAST2(above1) ||
- CHECK_LAST_OR_LAST2(left0) || CHECK_LAST_OR_LAST2(left1));
- else
- pred_context = 2;
- } else if (above_has_second || left_has_second) { // single/comp
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
- if (CHECK_LAST_OR_LAST2(rfs))
- pred_context =
- 3 + (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
- else if (CHECK_GOLDEN_OR_LAST3(rfs))
- pred_context =
- (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
- else
- pred_context =
- 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
- } else { // single/single
- if (CHECK_BACKWARD_REFS(above0) && CHECK_BACKWARD_REFS(left0)) {
- pred_context = 2 + (above0 == left0);
- } else if (CHECK_BACKWARD_REFS(above0) || CHECK_BACKWARD_REFS(left0)) {
- const MV_REFERENCE_FRAME edge0 =
- CHECK_BACKWARD_REFS(above0) ? left0 : above0;
- pred_context = 4 * CHECK_LAST_OR_LAST2(edge0);
- } else {
- pred_context =
- 2 * CHECK_LAST_OR_LAST2(above0) + 2 * CHECK_LAST_OR_LAST2(left0);
- }
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi) ||
- (CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]) &&
- !has_second_ref(edge_mbmi)))
- pred_context = 2;
- else if (!has_second_ref(edge_mbmi)) // single
- pred_context = 4 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]));
- else // comp
- pred_context = 3 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
- CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
- } else { // no edges available (2)
- pred_context = 2;
- }
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
+ return get_pred_context_ll2_or_l3gld(xd);
}
// For the bit to signal whether the single reference is LAST2_FRAME or
// LAST_FRAME, knowing that it shall be either of these 2 choices.
-//
-// NOTE(zoeliu): The probability of ref_frame[0] is LAST2_FRAME, conditioning
-// on it is either LAST2_FRAME/LAST_FRAME.
int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi)) { // single
- if (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]))
- pred_context = 3;
- else
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
- } else { // comp
- pred_context = 1 +
- 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
- }
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
- if (above_has_second && left_has_second) { // comp/comp
- if (above0 == left0 && above1 == left1)
- pred_context = 3 * (above0 == LAST_FRAME || above1 == LAST_FRAME ||
- left0 == LAST_FRAME || left1 == LAST_FRAME);
- else
- pred_context = 2;
- } else if (above_has_second || left_has_second) { // single/comp
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
- if (rfs == LAST_FRAME)
- pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
- else if (rfs == LAST2_FRAME)
- pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
- else
- pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
- } else { // single/single
- if (!CHECK_LAST_OR_LAST2(above0) && !CHECK_LAST_OR_LAST2(left0)) {
- pred_context = 2 + (above0 == left0);
- } else if (!CHECK_LAST_OR_LAST2(above0) ||
- !CHECK_LAST_OR_LAST2(left0)) {
- const MV_REFERENCE_FRAME edge0 =
- !CHECK_LAST_OR_LAST2(above0) ? left0 : above0;
- pred_context = 4 * (edge0 == LAST_FRAME);
- } else {
- pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
- }
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi) ||
- (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) &&
- !has_second_ref(edge_mbmi)))
- pred_context = 2;
- else if (!has_second_ref(edge_mbmi)) // single
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
- else // comp
- pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
- } else { // no edges available (2)
- pred_context = 2;
- }
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
+ return get_pred_context_last_or_last2(xd);
}
// For the bit to signal whether the single reference is GOLDEN_FRAME or
// LAST3_FRAME, knowing that it shall be either of these 2 choices.
-//
-// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME, conditioning
-// on it is either GOLDEN_FRAME/LAST3_FRAME.
int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi)) { // single
- if (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]))
- pred_context = 3;
- else
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
- } else { // comp
- pred_context = 1 +
- 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
- edge_mbmi->ref_frame[1] == LAST3_FRAME);
- }
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
- if (above_has_second && left_has_second) { // comp/comp
- if (above0 == left0 && above1 == left1)
- pred_context = 3 * (above0 == LAST3_FRAME || above1 == LAST3_FRAME ||
- left0 == LAST3_FRAME || left1 == LAST3_FRAME);
- else
- pred_context = 2;
- } else if (above_has_second || left_has_second) { // single/comp
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
- if (rfs == LAST3_FRAME)
- pred_context = 3 + (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
- else if (rfs == GOLDEN_FRAME)
- pred_context = (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
- else
- pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
- } else { // single/single
- if (!CHECK_GOLDEN_OR_LAST3(above0) && !CHECK_GOLDEN_OR_LAST3(left0)) {
- pred_context = 2 + (above0 == left0);
- } else if (!CHECK_GOLDEN_OR_LAST3(above0) ||
- !CHECK_GOLDEN_OR_LAST3(left0)) {
- const MV_REFERENCE_FRAME edge0 =
- !CHECK_GOLDEN_OR_LAST3(above0) ? left0 : above0;
- pred_context = 4 * (edge0 == LAST3_FRAME);
- } else {
- pred_context =
- 2 * (above0 == LAST3_FRAME) + 2 * (left0 == LAST3_FRAME);
- }
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi) ||
- (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]) &&
- !has_second_ref(edge_mbmi)))
- pred_context = 2;
- else if (!has_second_ref(edge_mbmi)) // single
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
- else // comp
- pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
- edge_mbmi->ref_frame[1] == LAST3_FRAME);
- } else { // no edges available (2)
- pred_context = 2;
- }
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
+ return get_pred_context_last3_or_gld(xd);
}
// For the bit to signal whether the single reference is ALTREF2_FRAME or
// BWDREF_FRAME, knowing that it shall be either of these 2 choices.
int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd) {
- return av1_get_pred_context_brf_or_arf2(xd);
-}
-
-#else // !CONFIG_EXT_REFS
-
-int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
- else
- pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
- if (above_has_second && left_has_second) {
- pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
- left0 == LAST_FRAME || left1 == LAST_FRAME);
- } else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
- if (rfs == LAST_FRAME)
- pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
- else
- pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
- } else {
- pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
- if (!is_inter_block(edge_mbmi)) { // intra
- pred_context = 2;
- } else { // inter
- if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
- else
- pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
- }
- } else { // no edges available
- pred_context = 2;
- }
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
+ return get_pred_context_brf_or_arf2(xd);
}
-
-int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi)) {
- if (edge_mbmi->ref_frame[0] == LAST_FRAME)
- pred_context = 3;
- else
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
- } else {
- pred_context = 1 +
- 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
- }
- } else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
- if (above_has_second && left_has_second) {
- if (above0 == left0 && above1 == left1)
- pred_context =
- 3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME ||
- left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME);
- else
- pred_context = 2;
- } else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
- const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
- if (rfs == GOLDEN_FRAME)
- pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
- else if (rfs != GOLDEN_FRAME && rfs != LAST_FRAME)
- pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
- else
- pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
- } else {
- if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
- pred_context = 3;
- } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
- const MV_REFERENCE_FRAME edge0 =
- (above0 == LAST_FRAME) ? left0 : above0;
- pred_context = 4 * (edge0 == GOLDEN_FRAME);
- } else {
- pred_context =
- 2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME);
- }
- }
- }
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi) ||
- (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
- pred_context = 2;
- else if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
- else
- pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
- } else { // no edges available (2)
- pred_context = 2;
- }
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-#endif // CONFIG_EXT_REFS
diff --git a/third_party/aom/av1/common/pred_common.h b/third_party/aom/av1/common/pred_common.h
index db4618a59..6a835c467 100644
--- a/third_party/aom/av1/common/pred_common.h
+++ b/third_party/aom/av1/common/pred_common.h
@@ -13,6 +13,7 @@
#define AV1_COMMON_PRED_COMMON_H_
#include "av1/common/blockd.h"
+#include "av1/common/mvref_common.h"
#include "av1/common/onyxc_int.h"
#include "aom_dsp/aom_dsp_common.h"
@@ -39,115 +40,174 @@ static INLINE int get_segment_id(const AV1_COMMON *const cm,
return segment_id;
}
+static INLINE int av1_get_spatial_seg_pred(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ int mi_row, int mi_col,
+ int *cdf_index) {
+ int prev_ul = -1; // top left segment_id
+ int prev_l = -1; // left segment_id
+ int prev_u = -1; // top segment_id
+ if ((xd->up_available) && (xd->left_available)) {
+ prev_ul = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
+ mi_row - 1, mi_col - 1);
+ }
+ if (xd->up_available) {
+ prev_u = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
+ mi_row - 1, mi_col - 0);
+ }
+ if (xd->left_available) {
+ prev_l = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
+ mi_row - 0, mi_col - 1);
+ }
+
+ // Pick CDF index based on number of matching/out-of-bounds segment IDs.
+ if (prev_ul < 0 || prev_u < 0 || prev_l < 0) /* Edge case */
+ *cdf_index = 0;
+ else if ((prev_ul == prev_u) && (prev_ul == prev_l))
+ *cdf_index = 2;
+ else if ((prev_ul == prev_u) || (prev_ul == prev_l) || (prev_u == prev_l))
+ *cdf_index = 1;
+ else
+ *cdf_index = 0;
+
+ // If 2 or more are identical returns that as predictor, otherwise prev_l.
+ if (prev_u == -1) // edge case
+ return prev_l == -1 ? 0 : prev_l;
+ if (prev_l == -1) // edge case
+ return prev_u;
+ return (prev_ul == prev_u) ? prev_u : prev_l;
+}
+
static INLINE int av1_get_pred_context_seg_id(const MACROBLOCKD *xd) {
- const MODE_INFO *const above_mi = xd->above_mi;
- const MODE_INFO *const left_mi = xd->left_mi;
- const int above_sip =
- (above_mi != NULL) ? above_mi->mbmi.seg_id_predicted : 0;
- const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0;
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+ const int above_sip = (above_mi != NULL) ? above_mi->seg_id_predicted : 0;
+ const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0;
return above_sip + left_sip;
}
-static INLINE aom_prob av1_get_pred_prob_seg_id(
- const struct segmentation_probs *segp, const MACROBLOCKD *xd) {
- return segp->pred_probs[av1_get_pred_context_seg_id(xd)];
+static INLINE int get_comp_index_context(const AV1_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ MB_MODE_INFO *mbmi = xd->mi[0];
+ int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
+ int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
+ int bck_frame_index = 0, fwd_frame_index = 0;
+ int cur_frame_index = cm->cur_frame->cur_frame_offset;
+
+ if (bck_idx >= 0)
+ bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
+
+ if (fwd_idx >= 0)
+ fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
+ int fwd = abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index));
+ int bck = abs(get_relative_dist(cm, cur_frame_index, bck_frame_index));
+
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+
+ int above_ctx = 0, left_ctx = 0;
+ const int offset = (fwd == bck);
+
+ if (above_mi) {
+ if (has_second_ref(above_mi))
+ above_ctx = above_mi->compound_idx;
+ else if (above_mi->ref_frame[0] == ALTREF_FRAME)
+ above_ctx = 1;
+ }
+
+ if (left_mi) {
+ if (has_second_ref(left_mi))
+ left_ctx = left_mi->compound_idx;
+ else if (left_mi->ref_frame[0] == ALTREF_FRAME)
+ left_ctx = 1;
+ }
+
+ return above_ctx + left_ctx + 3 * offset;
+}
+
+static INLINE int get_comp_group_idx_context(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+ int above_ctx = 0, left_ctx = 0;
+
+ if (above_mi) {
+ if (has_second_ref(above_mi))
+ above_ctx = above_mi->comp_group_idx;
+ else if (above_mi->ref_frame[0] == ALTREF_FRAME)
+ above_ctx = 3;
+ }
+ if (left_mi) {
+ if (has_second_ref(left_mi))
+ left_ctx = left_mi->comp_group_idx;
+ else if (left_mi->ref_frame[0] == ALTREF_FRAME)
+ left_ctx = 3;
+ }
+
+ return AOMMIN(5, above_ctx + left_ctx);
}
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_pred_cdf_seg_id(
struct segmentation_probs *segp, const MACROBLOCKD *xd) {
return segp->pred_cdf[av1_get_pred_context_seg_id(xd)];
}
-#endif
-static INLINE int av1_get_skip_context(const MACROBLOCKD *xd) {
- const MODE_INFO *const above_mi = xd->above_mi;
- const MODE_INFO *const left_mi = xd->left_mi;
- const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
- const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
- return above_skip + left_skip;
+static INLINE int av1_get_skip_mode_context(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+ const int above_skip_mode = above_mi ? above_mi->skip_mode : 0;
+ const int left_skip_mode = left_mi ? left_mi->skip_mode : 0;
+ return above_skip_mode + left_skip_mode;
}
-static INLINE aom_prob av1_get_skip_prob(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->skip_probs[av1_get_skip_context(xd)];
+static INLINE int av1_get_skip_context(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+ const int above_skip = above_mi ? above_mi->skip : 0;
+ const int left_skip = left_mi ? left_mi->skip : 0;
+ return above_skip + left_skip;
}
-#if CONFIG_DUAL_FILTER
int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir);
-#else
-int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
-#endif
-
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-int av1_get_pred_context_intra_interp(const MACROBLOCKD *xd);
-#endif // CONFIG_INTRA_INTERP
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE_DELTA_ENCODING
// Get a list of palette base colors that are used in the above and left blocks,
// referred to as "color cache". The return value is the number of colors in the
// cache (<= 2 * PALETTE_MAX_SIZE). The color values are stored in "cache"
// in ascending order.
int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
uint16_t *cache);
-#endif // CONFIG_PALETTE_DELTA_ENCODING
-int av1_get_intra_inter_context(const MACROBLOCKD *xd);
+static INLINE int av1_get_palette_bsize_ctx(BLOCK_SIZE bsize) {
+ return num_pels_log2_lookup[bsize] - num_pels_log2_lookup[BLOCK_8X8];
+}
-static INLINE aom_prob av1_get_intra_inter_prob(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->intra_inter_prob[av1_get_intra_inter_context(xd)];
+static INLINE int av1_get_palette_mode_ctx(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const above_mi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mi = xd->left_mbmi;
+ int ctx = 0;
+ if (above_mi) ctx += (above_mi->palette_mode_info.palette_size[0] > 0);
+ if (left_mi) ctx += (left_mi->palette_mode_info.palette_size[0] > 0);
+ return ctx;
}
-int av1_get_reference_mode_context(const AV1_COMMON *cm, const MACROBLOCKD *xd);
+int av1_get_intra_inter_context(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_reference_mode_prob(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->comp_inter_prob[av1_get_reference_mode_context(cm, xd)];
-}
-#if CONFIG_NEW_MULTISYMBOL
-static INLINE aom_cdf_prob *av1_get_reference_mode_cdf(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return xd->tile_ctx->comp_inter_cdf[av1_get_reference_mode_context(cm, xd)];
+int av1_get_reference_mode_context(const MACROBLOCKD *xd);
+
+static INLINE aom_cdf_prob *av1_get_reference_mode_cdf(const MACROBLOCKD *xd) {
+ return xd->tile_ctx->comp_inter_cdf[av1_get_reference_mode_context(xd)];
}
-#endif
-#if CONFIG_EXT_COMP_REFS
int av1_get_comp_reference_type_context(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_comp_reference_type_prob(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->comp_ref_type_prob[av1_get_comp_reference_type_context(xd)];
-}
+// == Uni-directional contexts ==
int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_uni_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p(xd);
- return cm->fc->uni_comp_ref_prob[pred_context][0];
-}
-
int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd);
-static INLINE aom_prob
-av1_get_pred_prob_uni_comp_ref_p1(const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p1(xd);
- return cm->fc->uni_comp_ref_prob[pred_context][1];
-}
-
int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd);
-static INLINE aom_prob
-av1_get_pred_prob_uni_comp_ref_p2(const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p2(xd);
- return cm->fc->uni_comp_ref_prob[pred_context][2];
-}
-
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_comp_reference_type_cdf(
const MACROBLOCKD *xd) {
const int pred_context = av1_get_comp_reference_type_context(xd);
@@ -171,211 +231,126 @@ static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p2(
const int pred_context = av1_get_pred_context_uni_comp_ref_p2(xd);
return xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
}
-#endif // CONFIG_NEW_MULTISYMBOL
-#endif // CONFIG_EXT_COMP_REFS
-int av1_get_pred_context_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd);
+// == Bi-directional contexts ==
-#if CONFIG_NEW_MULTISYMBOL
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p(cm, xd);
- return xd->tile_ctx->comp_ref_cdf[pred_context][0];
-}
-#endif
+int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_comp_ref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p(cm, xd);
- return cm->fc->comp_ref_prob[pred_context][0];
-}
+int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd);
-#if CONFIG_EXT_REFS
-int av1_get_pred_context_comp_ref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd);
+int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd);
-#if CONFIG_NEW_MULTISYMBOL
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p1(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p1(cm, xd);
- return xd->tile_ctx->comp_ref_cdf[pred_context][1];
-}
-#endif // CONFIG_NEW_MULTISYMBOL
+int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_comp_ref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p1(cm, xd);
- return cm->fc->comp_ref_prob[pred_context][1];
+int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd);
+
+static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p(const MACROBLOCKD *xd) {
+ const int pred_context = av1_get_pred_context_comp_ref_p(xd);
+ return xd->tile_ctx->comp_ref_cdf[pred_context][0];
}
-int av1_get_pred_context_comp_ref_p2(const AV1_COMMON *cm,
- const MACROBLOCKD *xd);
+static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p1(
+ const MACROBLOCKD *xd) {
+ const int pred_context = av1_get_pred_context_comp_ref_p1(xd);
+ return xd->tile_ctx->comp_ref_cdf[pred_context][1];
+}
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p2(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p2(cm, xd);
+ const MACROBLOCKD *xd) {
+ const int pred_context = av1_get_pred_context_comp_ref_p2(xd);
return xd->tile_ctx->comp_ref_cdf[pred_context][2];
}
-#endif // CONFIG_NEW_MULTISYMBOL
-static INLINE aom_prob av1_get_pred_prob_comp_ref_p2(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p2(cm, xd);
- return cm->fc->comp_ref_prob[pred_context][2];
-}
-
-int av1_get_pred_context_comp_bwdref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd);
-
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p(cm, xd);
+ const MACROBLOCKD *xd) {
+ const int pred_context = av1_get_pred_context_comp_bwdref_p(xd);
return xd->tile_ctx->comp_bwdref_cdf[pred_context][0];
}
-#endif // CONFIG_NEW_MULTISYMBOL
-static INLINE aom_prob av1_get_pred_prob_comp_bwdref_p(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p(cm, xd);
- return cm->fc->comp_bwdref_prob[pred_context][0];
-}
-
-int av1_get_pred_context_comp_bwdref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd);
-
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p1(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p1(cm, xd);
+ const MACROBLOCKD *xd) {
+ const int pred_context = av1_get_pred_context_comp_bwdref_p1(xd);
return xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
}
-#endif // CONFIG_NEW_MULTISYMBOL
-static INLINE aom_prob av1_get_pred_prob_comp_bwdref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p1(cm, xd);
- return cm->fc->comp_bwdref_prob[pred_context][1];
-}
-#endif // CONFIG_EXT_REFS
+// == Single contexts ==
int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p1(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p1(xd)][0];
-}
-
int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p2(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p2(xd)][1];
-}
-
-#if CONFIG_EXT_REFS
int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p3(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p3(xd)][2];
-}
-
int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p4(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p4(xd)][3];
-}
-
int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p5(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p5(xd)][4];
-}
-
int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd);
-static INLINE aom_prob av1_get_pred_prob_single_ref_p6(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p6(xd)][5];
-}
-#endif // CONFIG_EXT_REFS
-
-#if CONFIG_NEW_MULTISYMBOL
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p1(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p1(xd)][0];
}
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p2(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p2(xd)][1];
}
-#if CONFIG_EXT_REFS
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p3(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p3(xd)][2];
}
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p4(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p4(xd)][3];
}
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p5(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p5(xd)][4];
}
static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p6(
- const AV1_COMMON *cm, const MACROBLOCKD *xd) {
- (void)cm;
+ const MACROBLOCKD *xd) {
return xd->tile_ctx
->single_ref_cdf[av1_get_pred_context_single_ref_p6(xd)][5];
}
-#endif // CONFIG_EXT_REFS
-#endif // CONFIG_NEW_MULTISYMBOL
-
-#if CONFIG_COMPOUND_SINGLEREF
-int av1_get_inter_mode_context(const MACROBLOCKD *xd);
-
-static INLINE aom_prob av1_get_inter_mode_prob(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- return cm->fc->comp_inter_mode_prob[av1_get_inter_mode_context(xd)];
-}
-#endif // CONFIG_COMPOUND_SINGLEREF
// Returns a context number for the given MB prediction signal
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real blocks.
// The prediction flags in these dummy entries are initialized to 0.
static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
- const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
+ const MB_MODE_INFO *mbmi = xd->mi[0];
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const TX_SIZE max_tx_size = max_txsize_rect_lookup[mbmi->sb_type];
+ const int max_tx_wide = tx_size_wide[max_tx_size];
+ const int max_tx_high = tx_size_high[max_tx_size];
const int has_above = xd->up_available;
const int has_left = xd->left_available;
- int above_ctx = (has_above && !above_mbmi->skip)
- ? (int)txsize_sqr_map[above_mbmi->tx_size]
- : max_tx_size;
- int left_ctx = (has_left && !left_mbmi->skip)
- ? (int)txsize_sqr_map[left_mbmi->tx_size]
- : max_tx_size;
-
- if (!has_left) left_ctx = above_ctx;
- if (!has_above) above_ctx = left_ctx;
- return (above_ctx + left_ctx) > max_tx_size + TX_SIZE_LUMA_MIN;
+ int above = xd->above_txfm_context[0] >= max_tx_wide;
+ int left = xd->left_txfm_context[0] >= max_tx_high;
+
+ if (has_above)
+ if (is_inter_block(above_mbmi))
+ above = block_size_wide[above_mbmi->sb_type] >= max_tx_wide;
+
+ if (has_left)
+ if (is_inter_block(left_mbmi))
+ left = block_size_high[left_mbmi->sb_type] >= max_tx_high;
+
+ if (has_above && has_left)
+ return (above + left);
+ else if (has_above)
+ return above;
+ else if (has_left)
+ return left;
+ else
+ return 0;
}
#ifdef __cplusplus
diff --git a/third_party/aom/av1/common/pvq.c b/third_party/aom/av1/common/pvq.c
deleted file mode 100644
index 221c90c04..000000000
--- a/third_party/aom/av1/common/pvq.c
+++ /dev/null
@@ -1,1007 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "odintrin.h"
-#include "partition.h"
-#include "pvq.h"
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Imported from encode.c in daala */
-/* These are the PVQ equivalent of quantization matrices, except that
- the values are per-band. */
-#define OD_MASKING_DISABLED 0
-#define OD_MASKING_ENABLED 1
-
-const unsigned char OD_LUMA_QM_Q4[2][OD_QM_SIZE] = {
-/* Flat quantization for PSNR. The DC component isn't 16 because the DC
- magnitude compensation is done here for inter (Haar DC doesn't need it).
- Masking disabled: */
- {
- 16, 16,
- 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16
- },
-/* The non-flat AC coefficients compensate for the non-linear scaling caused
- by activity masking. The values are currently hand-tuned so that the rate
- of each band remains roughly constant when enabling activity masking
- on intra.
- Masking enabled: */
- {
- 16, 16,
- 16, 18, 28, 32,
- 16, 14, 20, 20, 28, 32,
- 16, 11, 14, 14, 17, 17, 22, 28
- }
-};
-
-const unsigned char OD_CHROMA_QM_Q4[2][OD_QM_SIZE] = {
-/* Chroma quantization is different because of the reduced lapping.
- FIXME: Use the same matrix as luma for 4:4:4.
- Masking disabled: */
- {
- 16, 16,
- 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16
- },
-/* The AC part is flat for chroma because it has no activity masking.
- Masking enabled: */
- {
- 16, 16,
- 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16
- }
-};
-
-/* No interpolation, always use od_flat_qm_q4, but use a different scale for
- each plane.
- FIXME: Add interpolation and properly tune chroma. */
-const od_qm_entry OD_DEFAULT_QMS[2][2][OD_NPLANES_MAX] = {
- /* Masking disabled */
- { { { 4, 256, OD_LUMA_QM_Q4[OD_MASKING_DISABLED] },
- { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED] },
- { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_DISABLED] } },
- { { 0, 0, NULL},
- { 0, 0, NULL},
- { 0, 0, NULL} } },
- /* Masking enabled */
- { { { 4, 256, OD_LUMA_QM_Q4[OD_MASKING_ENABLED] },
- { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED] },
- { 4, 256, OD_CHROMA_QM_Q4[OD_MASKING_ENABLED] } },
- { { 0, 0, NULL},
- { 0, 0, NULL},
- { 0, 0, NULL} } }
-};
-
-/* Constants for the beta parameter, which controls how activity masking is
- used.
- beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity
- masking is disabled. When beta is 1.5, activity masking is used. Note that
- activity masking is neither used for 4x4 blocks nor for chroma. */
-#define OD_BETA(b) OD_QCONST32(b, OD_BETA_SHIFT)
-static const od_val16 OD_PVQ_BETA4_LUMA[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_LUMA[4] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA16_LUMA[7] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-
-static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
-static const od_val16 OD_PVQ_BETA16_LUMA_MASKING[7] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5)};
-static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
- OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
-
-static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA16_CHROMA[7] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
-static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
- OD_BETA(1.), OD_BETA(1.)};
-
-const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1] = {
- {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA,
- OD_PVQ_BETA16_LUMA, OD_PVQ_BETA32_LUMA},
- {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
- OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
- {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
- OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}},
- {{OD_PVQ_BETA4_LUMA_MASKING, OD_PVQ_BETA8_LUMA_MASKING,
- OD_PVQ_BETA16_LUMA_MASKING, OD_PVQ_BETA32_LUMA_MASKING},
- {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
- OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
- {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
- OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}
-};
-
-
-void od_interp_qm(unsigned char *out, int q, const od_qm_entry *entry1,
- const od_qm_entry *entry2) {
- int i;
- if (entry2 == NULL || entry2->qm_q4 == NULL
- || q < entry1->interp_q << OD_COEFF_SHIFT) {
- /* Use entry1. */
- for (i = 0; i < OD_QM_SIZE; i++) {
- out[i] = OD_MINI(255, entry1->qm_q4[i]*entry1->scale_q8 >> 8);
- }
- }
- else if (entry1 == NULL || entry1->qm_q4 == NULL
- || q > entry2->interp_q << OD_COEFF_SHIFT) {
- /* Use entry2. */
- for (i = 0; i < OD_QM_SIZE; i++) {
- out[i] = OD_MINI(255, entry2->qm_q4[i]*entry2->scale_q8 >> 8);
- }
- }
- else {
- /* Interpolate between entry1 and entry2. The interpolation is linear
- in terms of log(q) vs log(m*scale). Considering that we're ultimately
- multiplying the result it makes sense, but we haven't tried other
- interpolation methods. */
- double x;
- const unsigned char *m1;
- const unsigned char *m2;
- int q1;
- int q2;
- m1 = entry1->qm_q4;
- m2 = entry2->qm_q4;
- q1 = entry1->interp_q << OD_COEFF_SHIFT;
- q2 = entry2->interp_q << OD_COEFF_SHIFT;
- x = (log(q)-log(q1))/(log(q2)-log(q1));
- for (i = 0; i < OD_QM_SIZE; i++) {
- out[i] = OD_MINI(255, (int)floor(.5 + (1./256)*exp(
- x*log(m2[i]*entry2->scale_q8) + (1 - x)*log(m1[i]*entry1->scale_q8))));
- }
- }
-}
-
-void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) {
- od_pvq_codeword_ctx *ctx;
- int i;
- int pli;
- int bs;
- ctx = &state->pvq_codeword_ctx;
- OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[0].cdf);
- OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[1].cdf);
- OD_CDFS_INIT_DYNAMIC(state->pvq_param_model[2].cdf);
- for (i = 0; i < 2*OD_TXSIZES; i++) {
- ctx->pvq_adapt[4*i + OD_ADAPT_K_Q8] = 384;
- ctx->pvq_adapt[4*i + OD_ADAPT_SUM_EX_Q8] = 256;
- ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_Q8] = 104;
- ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_EX_Q8] = 128;
- }
- OD_CDFS_INIT_DYNAMIC(ctx->pvq_k1_cdf);
- for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
- for (bs = 0; bs < OD_TXSIZES; bs++)
- for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
- state->pvq_exg[pli][bs][i] = 2 << 16;
- }
- }
- for (i = 0; i < OD_TXSIZES*PVQ_MAX_PARTITIONS; i++) {
- state->pvq_ext[i] = is_keyframe ? 24576 : 2 << 16;
- }
- OD_CDFS_INIT_DYNAMIC(state->pvq_gaintheta_cdf);
- OD_CDFS_INIT_Q15(state->pvq_skip_dir_cdf);
- OD_CDFS_INIT_DYNAMIC(ctx->pvq_split_cdf);
-}
-
-/* QMs are arranged from smallest to largest blocksizes, first for
- blocks with decimation=0, followed by blocks with decimation=1.*/
-int od_qm_offset(int bs, int xydec)
-{
- return xydec*OD_QM_STRIDE + OD_QM_OFFSET(bs);
-}
-
-#if defined(OD_FLOAT_PVQ)
-#define OD_DEFAULT_MAG 1.0
-#else
-#define OD_DEFAULT_MAG OD_QM_SCALE
-#endif
-
-/* Initialize the quantization matrix. */
-// Note: When hybrid transform and corresponding scan order is used by PVQ,
-// we don't need seperate qm and qm_inv for each transform type,
-// because AOM does not do magnitude compensation (i.e. simplay x16 for all coeffs).
-void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
- int i;
- int j;
- int16_t y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
- int16_t y_inv[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
- int16_t *x1;
- int16_t *x1_inv;
- int off;
- int bs;
- int xydec;
- for (bs = 0; bs < OD_TXSIZES; bs++) {
- for (xydec = 0; xydec < 2; xydec++) {
- off = od_qm_offset(bs, xydec);
- x1 = x + off;
- x1_inv = x_inv + off;
- for (i = 0; i < 4 << bs; i++) {
- for (j = 0; j < 4 << bs; j++) {
- /*This will ultimately be clamped to fit in 16 bits.*/
- od_val32 mag;
- int16_t ytmp;
- mag = OD_DEFAULT_MAG;
- if (i != 0 || j != 0) {
-#if defined(OD_FLOAT_PVQ)
- mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
-#else
- int qmv;
- qmv = qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
- mag *= 16;
- mag = (mag + (qmv >> 1))/qmv;
-#endif
- OD_ASSERT(mag > 0.0);
- }
- /*Convert to fit in 16 bits.*/
-#if defined(OD_FLOAT_PVQ)
- y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
- (int32_t)floor(.5 + mag*OD_QM_SCALE));
- y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
- + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
-#else
- y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX, mag);
- ytmp = y[i*(4 << bs) + j];
- y_inv[i*(4 << bs) + j] = (int16_t)((OD_QM_SCALE*OD_QM_INV_SCALE
- + (ytmp >> 1))/ytmp);
-#endif
- }
- }
- od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
- od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
- }
- }
-}
-
-/* Maps each possible size (n) in the split k-tokenizer to a different value.
- Possible values of n are:
- 2, 3, 4, 7, 8, 14, 15, 16, 31, 32, 63, 64, 127, 128
- Since we don't care about the order (even in the bit-stream) the simplest
- ordering (implemented here) is:
- 14, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 */
-int od_pvq_size_ctx(int n) {
- int logn;
- int odd;
- logn = OD_ILOG(n - 1);
- odd = n & 1;
- return 2*logn - 1 - odd - 7*(n == 14);
-}
-
-/* Maps a length n to a context for the (k=1, n<=16) coder, with a special
- case when n is the original length (orig_length=1) of the vector (i.e. we
- haven't split it yet). For orig_length=0, we use the same mapping as
- od_pvq_size_ctx() up to n=16. When orig_length=1, we map lengths
- 7, 8, 14, 15 to contexts 8 to 11. */
-int od_pvq_k1_ctx(int n, int orig_length) {
- if (orig_length) return 8 + 2*(n > 8) + (n & 1);
- else return od_pvq_size_ctx(n);
-}
-
-/* Indexing for the packed quantization matrices. */
-int od_qm_get_index(int bs, int band) {
- /* The -band/3 term is due to the fact that we force corresponding horizontal
- and vertical bands to have the same quantization. */
- OD_ASSERT(bs >= 0 && bs < OD_TXSIZES);
- return bs*(bs + 1) + band - band/3;
-}
-
-#if !defined(OD_FLOAT_PVQ)
-/*See celt/mathops.c in Opus and tools/cos_search.c.*/
-static int16_t od_pvq_cos_pi_2(int16_t x)
-{
- int16_t x2;
- x2 = OD_MULT16_16_Q15(x, x);
- return OD_MINI(32767, (1073758164 - x*x + x2*(-7654 + OD_MULT16_16_Q16(x2,
- 16573 + OD_MULT16_16_Q16(-2529, x2)))) >> 15);
-}
-#endif
-
-/*Approximates cos(x) for -pi < x < pi.
- Input is in OD_THETA_SCALE.*/
-od_val16 od_pvq_cos(od_val32 x) {
-#if defined(OD_FLOAT_PVQ)
- return cos(x);
-#else
- /*Wrap x around by masking, since cos is periodic.*/
- x = x & 0x0001ffff;
- if (x > (1 << 16)) {
- x = (1 << 17) - x;
- }
- if (x & 0x00007fff) {
- if (x < (1 << 15)) {
- return od_pvq_cos_pi_2((int16_t)x);
- }
- else {
- return -od_pvq_cos_pi_2((int16_t)(65536 - x));
- }
- }
- else {
- if (x & 0x0000ffff) {
- return 0;
- }
- else if (x & 0x0001ffff) {
- return -32767;
- }
- else {
- return 32767;
- }
- }
-#endif
-}
-
-/*Approximates sin(x) for 0 <= x < pi.
- Input is in OD_THETA_SCALE.*/
-od_val16 od_pvq_sin(od_val32 x) {
-#if defined(OD_FLOAT_PVQ)
- return sin(x);
-#else
- return od_pvq_cos(32768 - x);
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-/* Computes an upper-bound on the number of bits required to store the L2 norm
- of a vector (excluding sign). */
-int od_vector_log_mag(const od_coeff *x, int n) {
- int i;
- int32_t sum;
- sum = 0;
- for (i = 0; i < n; i++) {
- int16_t tmp;
- tmp = x[i] >> 8;
- sum += tmp*(int32_t)tmp;
- }
- /* We add one full bit (instead of rounding OD_ILOG() up) for safety because
- the >> 8 above causes the sum to be slightly underestimated. */
- return 8 + 1 + OD_ILOG(n + sum)/2;
-}
-#endif
-
-/** Computes Householder reflection that aligns the reference r to the
- * dimension in r with the greatest absolute value. The reflection
- * vector is returned in r.
- *
- * @param [in,out] r reference vector to be reflected, reflection
- * also returned in r
- * @param [in] n number of dimensions in r
- * @param [in] gr gain of reference vector
- * @param [out] sign sign of reflection
- * @return dimension number to which reflection aligns
- **/
-int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
- int shift) {
- int m;
- int i;
- int s;
- od_val16 maxr;
- OD_UNUSED(shift);
- /* Pick component with largest magnitude. Not strictly
- * necessary, but it helps numerical stability */
- m = 0;
- maxr = 0;
- for (i = 0; i < n; i++) {
- if (OD_ABS(r[i]) > maxr) {
- maxr = OD_ABS(r[i]);
- m = i;
- }
- }
- s = r[m] > 0 ? 1 : -1;
- /* This turns r into a Householder reflection vector that would reflect
- * the original r[] to e_m */
- r[m] += OD_SHR_ROUND(gr*s, shift);
- *sign = s;
- return m;
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_RCP_INSHIFT 15
-#define OD_RCP_OUTSHIFT 14
-static od_val16 od_rcp(od_val16 x)
-{
- int i;
- od_val16 n;
- od_val16 r;
- i = OD_ILOG(x) - 1;
- /*n is Q15 with range [0,1).*/
- n = OD_VSHR_ROUND(x, i - OD_RCP_INSHIFT) - (1 << OD_RCP_INSHIFT);
- /*Start with a linear approximation:
- r = 1.8823529411764706-0.9411764705882353*n.
- The coefficients and the result are Q14 in the range [15420,30840].*/
- r = 30840 + OD_MULT16_16_Q15(-15420, n);
- /*Perform two Newton iterations:
- r -= r*((r*n)-1.Q15)
- = r*((r*n)+(r-1.Q15)).*/
- r = r - OD_MULT16_16_Q15(r, (OD_MULT16_16_Q15(r, n) + r - 32768));
- /*We subtract an extra 1 in the second iteration to avoid overflow; it also
- neatly compensates for truncation error in the rest of the process.*/
- r = r - (1 + OD_MULT16_16_Q15(r, OD_MULT16_16_Q15(r, n) + r - 32768));
- /*r is now the Q15 solution to 2/(n+1), with a maximum relative error
- of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
- error of 1.24665/32768.*/
- return OD_VSHR_ROUND(r, i - OD_RCP_OUTSHIFT);
-}
-#endif
-
-/** Applies Householder reflection from compute_householder(). The
- * reflection is its own inverse.
- *
- * @param [out] out reflected vector
- * @param [in] x vector to be reflected
- * @param [in] r reflection
- * @param [in] n number of dimensions in x,r
- */
-void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
- int n) {
- int i;
- od_val32 proj;
- od_val16 proj_1;
- od_val32 l2r;
-#if !defined(OD_FLOAT_PVQ)
- od_val16 proj_norm;
- od_val16 l2r_norm;
- od_val16 rcp;
- int proj_shift;
- int l2r_shift;
- int outshift;
-#endif
- /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/
- l2r = 0;
- for (i = 0; i < n; i++) {
- l2r += OD_MULT16_16(r[i], r[i]);
- }
- /* Apply Householder reflection */
- proj = 0;
- for (i = 0; i < n; i++) {
- proj += OD_MULT16_16(r[i], x[i]);
- }
-#if defined(OD_FLOAT_PVQ)
- proj_1 = proj*2./(1e-100 + l2r);
- for (i = 0; i < n; i++) {
- out[i] = x[i] - r[i]*proj_1;
- }
-#else
- /*l2r_norm is [0.5, 1.0[ in Q15.*/
- l2r_shift = (OD_ILOG(l2r) - 1) - 14;
- l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift);
- rcp = od_rcp(l2r_norm);
- proj_shift = (OD_ILOG(abs(proj)) - 1) - 14;
- /*proj_norm is [0.5, 1.0[ in Q15.*/
- proj_norm = OD_VSHR_ROUND(proj, proj_shift);
- proj_1 = OD_MULT16_16_Q15(proj_norm, rcp);
- /*The proj*2. in the float code becomes -1 in the final outshift.
- The sign of l2r_shift is positive since we're taking the reciprocal of
- l2r_norm and this is a right shift.*/
- outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift);
- if (outshift >= 0) {
- for (i = 0; i < n; i++) {
- int32_t tmp;
- tmp = OD_MULT16_16(r[i], proj_1);
- tmp = OD_SHR_ROUND(tmp, outshift);
- out[i] = x[i] - tmp;
- }
- }
- else {
- /*FIXME: Can we make this case impossible?
- Right now, if r[] is all zeros except for 1, 2, or 3 ones, and
- if x[] is all zeros except for large values at the same position as the
- ones in r[], then we can end up with a shift of -1.*/
- for (i = 0; i < n; i++) {
- int32_t tmp;
- tmp = OD_MULT16_16(r[i], proj_1);
- tmp = OD_SHL(tmp, -outshift);
- out[i] = x[i] - tmp;
- }
- }
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-static od_val16 od_beta_rcp(od_val16 beta){
- if (beta == OD_BETA(1.))
- return OD_BETA(1.);
- else if (beta == OD_BETA(1.5))
- return OD_BETA(1./1.5);
- else {
- od_val16 rcp_beta;
- /*Shift by 1 less, transposing beta to range [.5, .75] and thus < 32768.*/
- rcp_beta = od_rcp(beta << (OD_RCP_INSHIFT - 1 - OD_BETA_SHIFT));
- return OD_SHR_ROUND(rcp_beta, OD_RCP_OUTSHIFT + 1 - OD_BETA_SHIFT);
- }
-}
-
-#define OD_EXP2_INSHIFT 15
-#define OD_EXP2_FRACSHIFT 15
-#define OD_EXP2_OUTSHIFT 15
-static const int32_t OD_EXP2_C[5] = {32768, 22709, 7913, 1704, 443};
-/*Output is [1.0, 2.0) in Q(OD_EXP2_FRACSHIFT).
- It does not include the integer offset, which is added in od_exp2 after the
- final shift).*/
-static int32_t od_exp2_frac(int32_t x)
-{
- return OD_MULT16_16_Q15(x, (OD_EXP2_C[1] + OD_MULT16_16_Q15(x,
- (OD_EXP2_C[2] + OD_MULT16_16_Q15(x, (OD_EXP2_C[3]
- + OD_MULT16_16_Q15(x, OD_EXP2_C[4])))))));
-}
-
-/** Base-2 exponential approximation (2^x) with Q15 input and output.*/
-static int32_t od_exp2(int32_t x)
-{
- int integer;
- int32_t frac;
- integer = x >> OD_EXP2_INSHIFT;
- if (integer > 14)
- return 0x7f000000;
- else if (integer < -15)
- return 0;
- frac = od_exp2_frac(x - OD_SHL(integer, OD_EXP2_INSHIFT));
- return OD_VSHR_ROUND(OD_EXP2_C[0] + frac, -integer) + 1;
-}
-
-#define OD_LOG2_INSHIFT 15
-#define OD_LOG2_OUTSHIFT 15
-#define OD_LOG2_INSCALE_1 (1./(1 << OD_LOG2_INSHIFT))
-#define OD_LOG2_OUTSCALE (1 << OD_LOG2_OUTSHIFT)
-static int16_t od_log2(int16_t x)
-{
- return x + OD_MULT16_16_Q15(x, (14482 + OD_MULT16_16_Q15(x, (-23234
- + OD_MULT16_16_Q15(x, (13643 + OD_MULT16_16_Q15(x, (-6403
- + OD_MULT16_16_Q15(x, 1515)))))))));
-}
-
-static int32_t od_pow(int32_t x, od_val16 beta)
-{
- int16_t t;
- int xshift;
- int log2_x;
- od_val32 logr;
- /*FIXME: this conditional is to avoid doing log2(0).*/
- if (x == 0)
- return 0;
- log2_x = (OD_ILOG(x) - 1);
- xshift = log2_x - OD_LOG2_INSHIFT;
- /*t should be in range [0.0, 1.0[ in Q(OD_LOG2_INSHIFT).*/
- t = OD_VSHR(x, xshift) - (1 << OD_LOG2_INSHIFT);
- /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in
- Q(OD_LOG2_OUTSHIFT).*/
- logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE;
- logr = (od_val32)OD_MULT16_32_QBETA(beta, logr);
- return od_exp2(logr);
-}
-#endif
-
-/** Gain companding: raises gain to the power 1/beta for activity masking.
- *
- * @param [in] g real (uncompanded) gain
- * @param [in] q0 uncompanded quality parameter
- * @param [in] beta activity masking beta param (exponent)
- * @return g^(1/beta)
- */
-static od_val32 od_gain_compand(od_val32 g, int q0, od_val16 beta) {
-#if defined(OD_FLOAT_PVQ)
- if (beta == 1) return OD_CGAIN_SCALE*g/(double)q0;
- else {
- return OD_CGAIN_SCALE*OD_COMPAND_SCALE*pow(g*OD_COMPAND_SCALE_1,
- 1./beta)/(double)q0;
- }
-#else
- if (beta == OD_BETA(1)) return (OD_CGAIN_SCALE*g + (q0 >> 1))/q0;
- else {
- int32_t expr;
- expr = od_pow(g, od_beta_rcp(beta));
- expr <<= OD_CGAIN_SHIFT + OD_COMPAND_SHIFT - OD_EXP2_OUTSHIFT;
- return (expr + (q0 >> 1))/q0;
- }
-#endif
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_SQRT_INSHIFT 16
-#define OD_SQRT_OUTSHIFT 15
-static int16_t od_rsqrt_norm(int16_t x);
-
-static int16_t od_sqrt_norm(int32_t x)
-{
- OD_ASSERT(x < 65536);
- return OD_MINI(OD_SHR_ROUND(x*od_rsqrt_norm(x), OD_SQRT_OUTSHIFT), 32767);
-}
-
-static int16_t od_sqrt(int32_t x, int *sqrt_shift)
-{
- int k;
- int s;
- int32_t t;
- if (x == 0) {
- *sqrt_shift = 0;
- return 0;
- }
- OD_ASSERT(x < (1 << 30));
- k = ((OD_ILOG(x) - 1) >> 1);
- /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
- Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
- s = 2*k - (OD_SQRT_INSHIFT - 2);
- t = OD_VSHR(x, s);
- /*We want to express od_sqrt() in terms of od_sqrt_norm(), which is
- defined as (2^OUTSHIFT)*sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
- This simplifies to 2^(OUTSHIFT-(INSHIFT/2)-(s/2))*sqrt(x), so the caller
- needs to shift right by OUTSHIFT - INSHIFT/2 - s/2.*/
- *sqrt_shift = OD_SQRT_OUTSHIFT - ((s + OD_SQRT_INSHIFT) >> 1);
- return od_sqrt_norm(t);
-}
-#endif
-
-/** Gain expanding: raises gain to the power beta for activity masking.
- *
- * @param [in] cg companded gain
- * @param [in] q0 uncompanded quality parameter
- * @param [in] beta activity masking beta param (exponent)
- * @return g^beta
- */
-od_val32 od_gain_expand(od_val32 cg0, int q0, od_val16 beta) {
- if (beta == OD_BETA(1)) {
- /*The multiply fits into 28 bits because the expanded gain has a range from
- 0 to 2^20.*/
- return OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
- }
- else if (beta == OD_BETA(1.5)) {
-#if defined(OD_FLOAT_PVQ)
- double cg;
- cg = cg0*OD_CGAIN_SCALE_1;
- cg *= q0*OD_COMPAND_SCALE_1;
- return OD_COMPAND_SCALE*cg*sqrt(cg);
-#else
- int32_t irt;
- int64_t tmp;
- int sqrt_inshift;
- int sqrt_outshift;
- /*cg0 is in Q(OD_CGAIN_SHIFT) and we need to divide it by
- 2^OD_COMPAND_SHIFT.*/
- irt = od_sqrt(cg0*q0, &sqrt_outshift);
- sqrt_inshift = (OD_CGAIN_SHIFT + OD_COMPAND_SHIFT) >> 1;
- /*tmp is in Q(OD_CGAIN_SHIFT + OD_COMPAND_SHIFT).*/
- tmp = cg0*q0*(int64_t)irt;
- /*Expanded gain must be in Q(OD_COMPAND_SHIFT), thus OD_COMPAND_SHIFT is
- not included here.*/
- return OD_MAXI(1,
- OD_VSHR_ROUND(tmp, OD_CGAIN_SHIFT + sqrt_outshift + sqrt_inshift));
-#endif
- }
- else {
-#if defined(OD_FLOAT_PVQ)
- /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the multiply by
- OD_COMPAND_SCALE.*/
- double cg;
- cg = cg0*OD_CGAIN_SCALE_1;
- return OD_COMPAND_SCALE*pow(cg*q0*OD_COMPAND_SCALE_1, beta);
-#else
- int32_t expr;
- int32_t cg;
- cg = OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
- expr = od_pow(cg, beta);
- /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the subtraction by
- OD_COMPAND_SHIFT.*/
- return OD_MAXI(1, OD_SHR_ROUND(expr, OD_EXP2_OUTSHIFT - OD_COMPAND_SHIFT));
-#endif
- }
-}
-
-/** Computes the raw and quantized/companded gain of a given input
- * vector
- *
- * @param [in] x vector of input data
- * @param [in] n number of elements in vector x
- * @param [in] q0 quantizer
- * @param [out] g raw gain
- * @param [in] beta activity masking beta param
- * @param [in] bshift shift to be applied to raw gain
- * @return quantized/companded gain
- */
-od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
- od_val16 beta, int bshift) {
- int i;
- od_val32 acc;
-#if !defined(OD_FLOAT_PVQ)
- od_val32 irt;
- int sqrt_shift;
-#else
- OD_UNUSED(bshift);
-#endif
- acc = 0;
- for (i = 0; i < n; i++) {
- acc += x[i]*(od_val32)x[i];
- }
-#if defined(OD_FLOAT_PVQ)
- *g = sqrt(acc);
-#else
- irt = od_sqrt(acc, &sqrt_shift);
- *g = OD_VSHR_ROUND(irt, sqrt_shift - bshift);
-#endif
- /* Normalize gain by quantization step size and apply companding
- (if ACTIVITY != 1). */
- return od_gain_compand(*g, q0, beta);
-}
-
-/** Compute theta quantization range from quantized/companded gain
- *
- * @param [in] qcg quantized companded gain value
- * @param [in] beta activity masking beta param
- * @return max theta value
- */
-int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta){
- /* Set angular resolution (in ra) to match the encoded gain */
-#if defined(OD_FLOAT_PVQ)
- int ts = (int)floor(.5 + qcg*OD_CGAIN_SCALE_1*M_PI/(2*beta));
-#else
- int ts = OD_SHR_ROUND(qcg*OD_MULT16_16_QBETA(OD_QCONST32(M_PI/2,
- OD_CGAIN_SHIFT), od_beta_rcp(beta)), OD_CGAIN_SHIFT*2);
-#endif
- /* Special case for low gains -- will need to be tuned anyway */
- if (qcg < OD_QCONST32(1.4, OD_CGAIN_SHIFT)) ts = 1;
- return ts;
-}
-
-/** Decode quantized theta value from coded value
- *
- * @param [in] t quantized companded gain value
- * @param [in] max_theta maximum theta value
- * @return decoded theta value
- */
-od_val32 od_pvq_compute_theta(int t, int max_theta) {
- if (max_theta != 0) {
-#if defined(OD_FLOAT_PVQ)
- return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta;
-#else
- return (OD_MAX_THETA_SCALE*OD_MINI(t, max_theta - 1)
- + (max_theta >> 1))/max_theta;
-#endif
- }
- else return 0;
-}
-
-#define OD_SQRT_TBL_SHIFT (10)
-
-#define OD_ITHETA_SHIFT 15
-/** Compute the number of pulses used for PVQ encoding a vector from
- * available metrics (encode and decode side)
- *
- * @param [in] qcg quantized companded gain value
- * @param [in] itheta quantized PVQ error angle theta
- * @param [in] noref indicates present or lack of reference
- * (prediction)
- * @param [in] n number of elements to be coded
- * @param [in] beta activity masking beta param
- * @return number of pulses to use for coding
- */
-int od_pvq_compute_k(od_val32 qcg, int itheta, int noref, int n,
- od_val16 beta) {
-#if !defined(OD_FLOAT_PVQ)
- /*Lookup table for sqrt(n+3/2) and sqrt(n+2/2) in Q10.
- Real max values are 32792 and 32784, but clamped to stay within 16 bits.
- Update with tools/gen_sqrt_tbl if needed.*/
- static const od_val16 od_sqrt_table[2][13] = {
- {0, 0, 0, 0, 2290, 2985, 4222, 0, 8256, 0, 16416, 0, 32767},
- {0, 0, 0, 0, 2401, 3072, 4284, 0, 8287, 0, 16432, 0, 32767}};
-#endif
- if (noref) {
- if (qcg == 0) return 0;
- if (n == 15 && qcg == OD_CGAIN_SCALE && beta > OD_BETA(1.25)) {
- return 1;
- }
- else {
-#if defined(OD_FLOAT_PVQ)
- return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1 - .2)*
- sqrt((n + 3)/2)/beta));
-#else
- od_val16 rt;
- OD_ASSERT(OD_ILOG(n + 1) < 13);
- rt = od_sqrt_table[1][OD_ILOG(n + 1)];
- /*FIXME: get rid of 64-bit mul.*/
- return OD_MAXI(1, OD_SHR_ROUND((int64_t)((qcg
- - (int64_t)OD_QCONST32(.2, OD_CGAIN_SHIFT))*
- OD_MULT16_16_QBETA(od_beta_rcp(beta), rt)), OD_CGAIN_SHIFT
- + OD_SQRT_TBL_SHIFT));
-#endif
- }
- }
- else {
- if (itheta == 0) return 0;
- /* Sets K according to gain and theta, based on the high-rate
- PVQ distortion curves (see PVQ document). Low-rate will have to be
- perceptually tuned anyway. We subtract 0.2 from the radius as an
- approximation for the fact that the coefficients aren't identically
- distributed within a band so at low gain the number of dimensions that
- are likely to have a pulse is less than n. */
-#if defined(OD_FLOAT_PVQ)
- return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
-#else
- od_val16 rt;
- OD_ASSERT(OD_ILOG(n + 1) < 13);
- rt = od_sqrt_table[0][OD_ILOG(n + 1)];
- /*FIXME: get rid of 64-bit mul.*/
- return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
- - OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
- OD_SQRT_TBL_SHIFT + OD_ITHETA_SHIFT));
-#endif
- }
-}
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_RSQRT_INSHIFT 16
-#define OD_RSQRT_OUTSHIFT 14
-/** Reciprocal sqrt approximation where the input is in the range [0.25,1) in
- Q16 and the output is in the range (1.0, 2.0] in Q14).
- Error is always within +/1 of round(1/sqrt(t))*/
-static int16_t od_rsqrt_norm(int16_t t)
-{
- int16_t n;
- int32_t r;
- int32_t r2;
- int32_t ry;
- int32_t y;
- int32_t ret;
- /* Range of n is [-16384,32767] ([-0.5,1) in Q15).*/
- n = t - 32768;
- OD_ASSERT(n >= -16384);
- /*Get a rough initial guess for the root.
- The optimal minimax quadratic approximation (using relative error) is
- r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
- Coefficients here, and the final result r, are Q14.*/
- r = (23565 + OD_MULT16_16_Q15(n, (-13481 + OD_MULT16_16_Q15(n, 6711))));
- /*We want y = t*r*r-1 in Q15, but t is 32-bit Q16 and r is Q14.
- We can compute the result from n and r using Q15 multiplies with some
- adjustment, carefully done to avoid overflow.*/
- r2 = r*r;
- y = (((r2 >> 15)*n + r2) >> 12) - 131077;
- ry = r*y;
- /*Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
- This yields the Q14 reciprocal square root of the Q16 t, with a maximum
- relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a peak
- absolute error of 2.26591/16384.*/
- ret = r + ((((ry >> 16)*(3*y) >> 3) - ry) >> 18);
- OD_ASSERT(ret >= 16384 && ret < 32768);
- return (int16_t)ret;
-}
-
-static int16_t od_rsqrt(int32_t x, int *rsqrt_shift)
-{
- int k;
- int s;
- int16_t t;
- k = (OD_ILOG(x) - 1) >> 1;
- /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
- Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
- s = 2*k - (OD_RSQRT_INSHIFT - 2);
- t = OD_VSHR(x, s);
- /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is
- defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
- This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller
- needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/
- *rsqrt_shift = OD_RSQRT_OUTSHIFT + ((s + OD_RSQRT_INSHIFT) >> 1);
- return od_rsqrt_norm(t);
-}
-#endif
-
-/** Synthesizes one parition of coefficient values from a PVQ-encoded
- * vector. This 'partial' version is called by the encode loop where
- * the Householder reflection has already been computed and there's no
- * need to recompute it.
- *
- * @param [out] xcoeff output coefficient partition (x in math doc)
- * @param [in] ypulse PVQ-encoded values (y in the math doc); in
- * the noref case, this vector has n entries,
- * in the reference case it contains n-1 entries
- * (the m-th entry is not included)
- * @param [in] r reference vector (prediction)
- * @param [in] n number of elements in this partition
- * @param [in] noref indicates presence or lack of prediction
- * @param [in] g decoded quantized vector gain
- * @param [in] theta decoded theta (prediction error)
- * @param [in] m alignment dimension of Householder reflection
- * @param [in] s sign of Householder reflection
- * @param [in] qm_inv inverse of the QM with magnitude compensation
- */
-void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
- const od_val16 *r16, int n, int noref, od_val32 g, od_val32 theta, int m, int s,
- const int16_t *qm_inv) {
- int i;
- int yy;
- od_val32 scale;
- int nn;
-#if !defined(OD_FLOAT_PVQ)
- int gshift;
- int qshift;
-#endif
- OD_ASSERT(g != 0);
- nn = n-(!noref); /* when noref==0, vector in is sized n-1 */
- yy = 0;
- for (i = 0; i < nn; i++)
- yy += ypulse[i]*(int32_t)ypulse[i];
-#if !defined(OD_FLOAT_PVQ)
- /* Shift required for the magnitude of the pre-qm synthesis to be guaranteed
- to fit in 16 bits. In practice, the range will be 8192-16384 after scaling
- most of the time. */
- gshift = OD_MAXI(0, OD_ILOG(g) - 14);
-#endif
- /*scale is g/sqrt(yy) in Q(16-gshift) so that x[]*scale has a norm that fits
- in 16 bits.*/
- if (yy == 0) scale = 0;
-#if defined(OD_FLOAT_PVQ)
- else {
- scale = g/sqrt(yy);
- }
-#else
- else {
- int rsqrt_shift;
- int16_t rsqrt;
- /*FIXME: should be < int64_t*/
- int64_t tmp;
- rsqrt = od_rsqrt(yy, &rsqrt_shift);
- tmp = rsqrt*(int64_t)g;
- scale = OD_VSHR_ROUND(tmp, rsqrt_shift + gshift - 16);
- }
- /* Shift to apply after multiplying by the inverse QM, taking into account
- gshift. */
- qshift = OD_QM_INV_SHIFT - gshift;
-#endif
- if (noref) {
- for (i = 0; i < n; i++) {
- od_val32 x;
- /* This multiply doesn't round, so it introduces some bias.
- It would be nice (but not critical) to fix this. */
- x = (od_val32)OD_MULT16_32_Q16(ypulse[i], scale);
-#if defined(OD_FLOAT_PVQ)
- xcoeff[i] = (od_coeff)floor(.5
- + x*(qm_inv[i]*OD_QM_INV_SCALE_1));
-#else
- xcoeff[i] = OD_SHR_ROUND(x*qm_inv[i], qshift);
-#endif
- }
- }
- else{
- od_val16 x[MAXN];
- scale = OD_ROUND32(scale*OD_TRIG_SCALE_1*od_pvq_sin(theta));
- /* The following multiply doesn't round, but it's probably OK since
- the Householder reflection is likely to undo most of the resulting
- bias. */
- for (i = 0; i < m; i++)
- x[i] = OD_MULT16_32_Q16(ypulse[i], scale);
- x[m] = OD_ROUND16(-s*(OD_SHR_ROUND(g, gshift))*OD_TRIG_SCALE_1*
- od_pvq_cos(theta));
- for (i = m; i < nn; i++)
- x[i+1] = OD_MULT16_32_Q16(ypulse[i], scale);
- od_apply_householder(x, x, r16, n);
- for (i = 0; i < n; i++) {
-#if defined(OD_FLOAT_PVQ)
- xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1)));
-#else
- xcoeff[i] = OD_SHR_ROUND(x[i]*qm_inv[i], qshift);
-#endif
- }
- }
-}
diff --git a/third_party/aom/av1/common/pvq.h b/third_party/aom/av1/common/pvq.h
deleted file mode 100644
index 4adf22f02..000000000
--- a/third_party/aom/av1/common/pvq.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#if !defined(_pvq_H)
-# define _pvq_H (1)
-# include "generic_code.h"
-# include "odintrin.h"
-
-extern const uint16_t EXP_CDF_TABLE[][16];
-extern const uint16_t LAPLACE_OFFSET[];
-
-#define AV1_PVQ_ENABLE_ACTIVITY_MASKING (0)
-
-# define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
-
-# define OD_NOREF_ADAPT_SPEED (4)
-/* Normalized lambda for PVQ quantizer. Since we normalize the gain by q, the
- distortion is normalized by q^2 and lambda does not need the q^2 factor.
- At high rate, this would be log(2)/6, but we're using a slightly more
- aggressive value, closer to:
- Li, Xiang, et al. "Laplace distribution based Lagrangian rate distortion
- optimization for hybrid video coding." Circuits and Systems for Video
- Technology, IEEE Transactions on 19.2 (2009): 193-205.
- */
-# define OD_PVQ_LAMBDA (.1146)
-
-#define OD_PVQ_SKIP_ZERO 1
-#define OD_PVQ_SKIP_COPY 2
-
-/* Maximum size for coding a PVQ band. */
-#define OD_MAX_PVQ_SIZE (1024)
-
-#if defined(OD_FLOAT_PVQ)
-#define OD_QM_SHIFT (15)
-#else
-#define OD_QM_SHIFT (11)
-#endif
-#define OD_QM_SCALE (1 << OD_QM_SHIFT)
-#if defined(OD_FLOAT_PVQ)
-#define OD_QM_SCALE_1 (1./OD_QM_SCALE)
-#endif
-#define OD_QM_SCALE_MAX 32767
-#define OD_QM_INV_SHIFT (12)
-#define OD_QM_INV_SCALE (1 << OD_QM_INV_SHIFT)
-#if defined(OD_FLOAT_PVQ)
-#define OD_QM_INV_SCALE_1 (1./OD_QM_INV_SCALE)
-#endif
-#define OD_QM_OFFSET(bs) ((((1 << 2*bs) - 1) << 2*OD_LOG_BSIZE0)/3)
-#define OD_QM_STRIDE (OD_QM_OFFSET(OD_TXSIZES))
-#define OD_QM_BUFFER_SIZE (2*OD_QM_STRIDE)
-
-#if !defined(OD_FLOAT_PVQ)
-#define OD_THETA_SHIFT (15)
-#define OD_THETA_SCALE ((1 << OD_THETA_SHIFT)*2./M_PI)
-#define OD_MAX_THETA_SCALE (1 << OD_THETA_SHIFT)
-#define OD_TRIG_SCALE (32768)
-#define OD_BETA_SHIFT (12)
-#define OD_BETA_SCALE_1 (1./(1 << OD_BETA_SHIFT))
-/*Multiplies 16-bit a by 32-bit b and keeps bits [16:64-OD_BETA_SHIFT-1].*/
-#define OD_MULT16_32_QBETA(a, b) \
- ((int16_t)(a)*(int64_t)(int32_t)(b) >> OD_BETA_SHIFT)
-# define OD_MULT16_16_QBETA(a, b) \
- ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> OD_BETA_SHIFT)
-#define OD_CGAIN_SHIFT (8)
-#define OD_CGAIN_SCALE (1 << OD_CGAIN_SHIFT)
-#else
-#define OD_BETA_SCALE_1 (1.)
-#define OD_THETA_SCALE (1)
-#define OD_TRIG_SCALE (1)
-#define OD_CGAIN_SCALE (1)
-#endif
-#define OD_THETA_SCALE_1 (1./OD_THETA_SCALE)
-#define OD_TRIG_SCALE_1 (1./OD_TRIG_SCALE)
-#define OD_CGAIN_SCALE_1 (1./OD_CGAIN_SCALE)
-#define OD_CGAIN_SCALE_2 (OD_CGAIN_SCALE_1*OD_CGAIN_SCALE_1)
-
-/* Largest PVQ partition is half the coefficients of largest block size. */
-#define MAXN (OD_TXSIZE_MAX*OD_TXSIZE_MAX/2)
-
-#define OD_COMPAND_SHIFT (8 + OD_COEFF_SHIFT)
-#define OD_COMPAND_SCALE (1 << OD_COMPAND_SHIFT)
-#define OD_COMPAND_SCALE_1 (1./OD_COMPAND_SCALE)
-
-#define OD_QM_SIZE (OD_TXSIZES*(OD_TXSIZES + 1))
-
-#define OD_FLAT_QM 0
-#define OD_HVS_QM 1
-
-# define OD_NSB_ADAPT_CTXS (4)
-
-# define OD_ADAPT_K_Q8 0
-# define OD_ADAPT_SUM_EX_Q8 1
-# define OD_ADAPT_COUNT_Q8 2
-# define OD_ADAPT_COUNT_EX_Q8 3
-
-# define OD_ADAPT_NO_VALUE (-2147483647-1)
-
-typedef enum {
- PVQ_SKIP = 0x0,
- DC_CODED = 0x1,
- AC_CODED = 0x2,
- AC_DC_CODED = 0x3,
-} PVQ_SKIP_TYPE;
-
-typedef struct od_pvq_adapt_ctx od_pvq_adapt_ctx;
-typedef struct od_pvq_codeword_ctx od_pvq_codeword_ctx;
-
-struct od_pvq_codeword_ctx {
- int pvq_adapt[2*OD_TXSIZES*OD_NSB_ADAPT_CTXS];
- /* CDFs are size 16 despite the fact that we're using less than that. */
- uint16_t pvq_k1_cdf[12][CDF_SIZE(16)];
- uint16_t pvq_split_cdf[22*7][CDF_SIZE(8)];
-};
-
-struct od_pvq_adapt_ctx {
- od_pvq_codeword_ctx pvq_codeword_ctx;
- generic_encoder pvq_param_model[3];
- int pvq_ext[OD_TXSIZES*PVQ_MAX_PARTITIONS];
- int pvq_exg[OD_NPLANES_MAX][OD_TXSIZES][PVQ_MAX_PARTITIONS];
- uint16_t pvq_gaintheta_cdf[2*OD_TXSIZES*PVQ_MAX_PARTITIONS][CDF_SIZE(16)];
- uint16_t pvq_skip_dir_cdf[2*(OD_TXSIZES-1)][CDF_SIZE(7)];
-};
-
-typedef struct od_qm_entry {
- int interp_q;
- int scale_q8;
- const unsigned char *qm_q4;
-} od_qm_entry;
-
-extern const od_qm_entry OD_DEFAULT_QMS[2][2][OD_NPLANES_MAX];
-
-void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe);
-int od_pvq_size_ctx(int n);
-int od_pvq_k1_ctx(int n, int orig_size);
-
-od_val16 od_pvq_sin(od_val32 x);
-od_val16 od_pvq_cos(od_val32 x);
-#if !defined(OD_FLOAT_PVQ)
-int od_vector_log_mag(const od_coeff *x, int n);
-#endif
-
-void od_interp_qm(unsigned char *out, int q, const od_qm_entry *entry1,
- const od_qm_entry *entry2);
-
-int od_qm_get_index(int bs, int band);
-
-extern const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1];
-
-void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm);
-int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
- int shift);
-void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
- int n);
-void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
- const od_val16 *r, int n,
- int noref, od_val32 g,
- od_val32 theta, int m, int s,
- const int16_t *qm_inv);
-od_val32 od_gain_expand(od_val32 cg, int q0, od_val16 beta);
-od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
- od_val16 beta, int bshift);
-int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta);
-od_val32 od_pvq_compute_theta(int t, int max_theta);
-int od_pvq_compute_k(od_val32 qcg, int itheta, int noref, int n, od_val16 beta);
-
-int od_vector_is_null(const od_coeff *x, int len);
-int od_qm_offset(int bs, int xydec);
-
-#endif
diff --git a/third_party/aom/av1/common/pvq_state.c b/third_party/aom/av1/common/pvq_state.c
deleted file mode 100644
index 197b9b3a8..000000000
--- a/third_party/aom/av1/common/pvq_state.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/pvq_state.h"
-#include "av1/common/odintrin.h"
-
-void od_adapt_ctx_reset(od_adapt_ctx *adapt, int is_keyframe) {
- int pli;
- od_adapt_pvq_ctx_reset(&adapt->pvq, is_keyframe);
- OD_CDFS_INIT_Q15(adapt->skip_cdf);
- for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
- int i;
- OD_CDFS_INIT_DYNAMIC(adapt->model_dc[pli].cdf);
- for (i = 0; i < OD_TXSIZES; i++) {
- int j;
- adapt->ex_g[pli][i] = 8;
- for (j = 0; j < 3; j++) {
- adapt->ex_dc[pli][i][j] = pli > 0 ? 8 : 32768;
- }
- }
- }
-}
-
-void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe, int bo,
- int n, int w) {
- int i;
- int j;
- if (is_keyframe) {
- for (i = 0; i < n; i++) {
- for (j = 0; j < n; j++) {
- /* skip DC */
- if (i || j) d[bo + i * w + j] = 0;
- }
- }
- } else {
- for (i = 0; i < n; i++) {
- for (j = 0; j < n; j++) {
- d[bo + i * w + j] = pred[i * n + j];
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/pvq_state.h b/third_party/aom/av1/common/pvq_state.h
deleted file mode 100644
index 84d454e70..000000000
--- a/third_party/aom/av1/common/pvq_state.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#if !defined(_state_H)
-# define _state_H (1)
-
-typedef struct od_state od_state;
-typedef struct od_adapt_ctx od_adapt_ctx;
-
-# include "generic_code.h"
-# include "odintrin.h"
-# include "pvq.h"
-
-/*Adaptation speed of scalar Laplace encoding.*/
-# define OD_SCALAR_ADAPT_SPEED (4)
-
-struct od_adapt_ctx {
- /* Support for PVQ encode/decode */
- od_pvq_adapt_ctx pvq;
-
- generic_encoder model_dc[OD_NPLANES_MAX];
-
- int ex_dc[OD_NPLANES_MAX][OD_TXSIZES][3];
- int ex_g[OD_NPLANES_MAX][OD_TXSIZES];
-
- /* Joint skip flag for DC and AC */
- uint16_t skip_cdf[OD_TXSIZES*2][CDF_SIZE(4)];
-};
-
-struct od_state {
- od_adapt_ctx *adapt;
- unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE];
- /* Quantization matrices and their inverses. */
- int16_t qm[OD_QM_BUFFER_SIZE];
- int16_t qm_inv[OD_QM_BUFFER_SIZE];
-};
-
-void od_adapt_ctx_reset(od_adapt_ctx *state, int is_keyframe);
-void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe,
- int bo, int n, int w);
-
-#endif
diff --git a/third_party/aom/av1/common/quant_common.c b/third_party/aom/av1/common/quant_common.c
index ea7140cdc..84575d74b 100644
--- a/third_party/aom/av1/common/quant_common.c
+++ b/third_party/aom/av1/common/quant_common.c
@@ -16,111 +16,7 @@
#include "av1/common/seg_common.h"
#include "av1/common/blockd.h"
-#if CONFIG_NEW_QUANT
-// Bin widths expressed as a fraction over 128 of the quant stepsize,
-// for the quantization bins 0-4.
-// So a value x indicates the bin is actually factor x/128 of the
-// nominal quantization step. For the zero bin, the width is only
-// for one side of zero, so the actual width is twice that.
-//
-// Functions with nuq correspond to "non uniform quantization"
-// TODO(sarahparker, debargha): Optimize these tables
-
-typedef struct {
- uint8_t knots[NUQ_KNOTS]; // offsets
- uint8_t doff; // dequantization
-} qprofile_type;
-
-static const qprofile_type nuq[QUANT_PROFILES][COEF_BANDS] = {
- {
- // lossless
- { { 64, 128, 128 }, 0 }, // dc, band 0
- { { 64, 128, 128 }, 0 }, // band 1
- { { 64, 128, 128 }, 0 }, // band 2
- { { 64, 128, 128 }, 0 }, // band 3
- { { 64, 128, 128 }, 0 }, // band 4
- { { 64, 128, 128 }, 0 }, // band 5
- },
- {
- { { 64, 128, 128 }, 4 }, // dc, band 0
- { { 64, 128, 128 }, 6 }, // band 1
- { { 64, 128, 128 }, 8 }, // band 2
- { { 64, 128, 128 }, 10 }, // band 3
- { { 72, 128, 128 }, 12 }, // band 4
- { { 80, 128, 128 }, 14 } // band 5
- },
- {
- { { 64, 128, 128 }, 6 }, // dc, band 0
- { { 64, 128, 128 }, 8 }, // band 1
- { { 64, 128, 128 }, 10 }, // band 2
- { { 64, 128, 128 }, 12 }, // band 3
- { { 72, 128, 128 }, 14 }, // band 4
- { { 80, 128, 128 }, 16 } // band 5
- },
- {
- { { 64, 128, 128 }, 8 }, // dc, band 0
- { { 64, 128, 128 }, 10 }, // band 1
- { { 64, 128, 128 }, 12 }, // band 2
- { { 72, 128, 128 }, 14 }, // band 3
- { { 76, 128, 128 }, 16 }, // band 4
- { { 80, 128, 128 }, 18 } // band 5
- }
-};
-
-static const uint8_t *get_nuq_knots(int band, int q_profile) {
- return nuq[q_profile][band].knots;
-}
-
-static INLINE int16_t quant_to_doff_fixed(int band, int q_profile) {
- return nuq[q_profile][band].doff;
-}
-
-// get cumulative bins
-static INLINE void get_cuml_bins_nuq(int q, int band, tran_low_t *cuml_bins,
- int q_profile) {
- const uint8_t *knots = get_nuq_knots(band, q_profile);
- int16_t cuml_knots[NUQ_KNOTS];
- int i;
- cuml_knots[0] = knots[0];
- for (i = 1; i < NUQ_KNOTS; ++i) cuml_knots[i] = cuml_knots[i - 1] + knots[i];
- for (i = 0; i < NUQ_KNOTS; ++i)
- cuml_bins[i] = ROUND_POWER_OF_TWO(cuml_knots[i] * q, 7);
-}
-
-void av1_get_dequant_val_nuq(int q, int band, tran_low_t *dq,
- tran_low_t *cuml_bins, int q_profile) {
- const uint8_t *knots = get_nuq_knots(band, q_profile);
- tran_low_t cuml_bins_[NUQ_KNOTS], *cuml_bins_ptr;
- tran_low_t doff;
- int i;
- cuml_bins_ptr = (cuml_bins ? cuml_bins : cuml_bins_);
- get_cuml_bins_nuq(q, band, cuml_bins_ptr, q_profile);
- dq[0] = 0;
- for (i = 1; i < NUQ_KNOTS; ++i) {
- doff = quant_to_doff_fixed(band, q_profile);
- doff = ROUND_POWER_OF_TWO(doff * knots[i], 7);
- dq[i] =
- cuml_bins_ptr[i - 1] + ROUND_POWER_OF_TWO((knots[i] - doff * 2) * q, 8);
- }
- doff = quant_to_doff_fixed(band, q_profile);
- dq[NUQ_KNOTS] =
- cuml_bins_ptr[NUQ_KNOTS - 1] + ROUND_POWER_OF_TWO((64 - doff) * q, 7);
-}
-
-tran_low_t av1_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
- if (v <= NUQ_KNOTS)
- return dq[v];
- else
- return dq[NUQ_KNOTS] + (v - NUQ_KNOTS) * q;
-}
-
-tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
- tran_low_t dqmag = av1_dequant_abscoeff_nuq(abs(v), q, dq);
- return (v < 0 ? -dqmag : dqmag);
-}
-#endif // CONFIG_NEW_QUANT
-
-static const int16_t dc_qlookup[QINDEX_RANGE] = {
+static const int16_t dc_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
@@ -142,8 +38,7 @@ static const int16_t dc_qlookup[QINDEX_RANGE] = {
1184, 1232, 1282, 1336,
};
-#if CONFIG_HIGHBITDEPTH
-static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
+static const int16_t dc_qlookup_10_Q3[QINDEX_RANGE] = {
4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
@@ -166,7 +61,7 @@ static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
};
-static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
+static const int16_t dc_qlookup_12_Q3[QINDEX_RANGE] = {
4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
@@ -192,9 +87,8 @@ static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
19718, 20521, 21387,
};
-#endif
-static const int16_t ac_qlookup[QINDEX_RANGE] = {
+static const int16_t ac_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
@@ -217,8 +111,7 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = {
1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
-#if CONFIG_HIGHBITDEPTH
-static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
+static const int16_t ac_qlookup_10_Q3[QINDEX_RANGE] = {
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
@@ -241,7 +134,7 @@ static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
};
-static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
+static const int16_t ac_qlookup_12_Q3[QINDEX_RANGE] = {
4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
@@ -267,64 +160,88 @@ static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
28143, 28687, 29247,
};
-#endif
-int16_t av1_dc_quant(int qindex, int delta, aom_bit_depth_t bit_depth) {
-#if CONFIG_HIGHBITDEPTH
+// Coefficient scaling and quantization with AV1 TX are tailored to
+// the AV1 TX transforms. Regardless of the bit-depth of the input,
+// the transform stages scale the coefficient values up by a factor of
+// 8 (3 bits) over the scale of the pixel values. Thus, for 8-bit
+// input, the coefficients have effectively 11 bits of scale depth
+// (8+3), 10-bit input pixels result in 13-bit coefficient depth
+// (10+3) and 12-bit pixels yield 15-bit (12+3) coefficient depth.
+// All quantizers are built using this invariant of x8, 3-bit scaling,
+// thus the Q3 suffix.
+
+// A partial exception to this rule is large transforms; to avoid
+// overflow, TX blocks with > 256 pels (>16x16) are scaled only
+// 4-times unity (2 bits) over the pixel depth, and TX blocks with
+// over 1024 pixels (>32x32) are scaled up only 2x unity (1 bit).
+// This descaling is found via av1_tx_get_scale(). Thus, 16x32, 32x16
+// and 32x32 transforms actually return Q2 coefficients, and 32x64,
+// 64x32 and 64x64 transforms return Q1 coefficients. However, the
+// quantizers are de-scaled down on-the-fly by the same amount
+// (av1_tx_get_scale()) during quantization, and as such the
+// dequantized/decoded coefficients, even for large TX blocks, are always
+// effectively Q3. Meanwhile, quantized/coded coefficients are Q0
+// because Qn quantizers are applied to Qn tx coefficients.
+
+// Note that encoder decision making (which uses the quantizer to
+// generate several bespoke lamdas for RDO and other heuristics)
+// expects quantizers to be larger for higher-bitdepth input. In
+// addition, the minimum allowable quantizer is 4; smaller values will
+// underflow to 0 in the actual quantization routines.
+
+int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
switch (bit_depth) {
- case AOM_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_8: return dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_10: return dc_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_12: return dc_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
-#else
- (void)bit_depth;
- return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
-#endif
}
-int16_t av1_ac_quant(int qindex, int delta, aom_bit_depth_t bit_depth) {
-#if CONFIG_HIGHBITDEPTH
+int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
switch (bit_depth) {
- case AOM_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_8: return ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_10: return ac_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
+ case AOM_BITS_12: return ac_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
-#else
- (void)bit_depth;
- return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
-#endif
}
-int16_t av1_qindex_from_ac(int ac, aom_bit_depth_t bit_depth) {
+// In AV1 TX, the coefficients are always scaled up a factor of 8 (3
+// bits), so QTX == Q3.
+
+int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
+ return av1_dc_quant_Q3(qindex, delta, bit_depth);
+}
+
+int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
+ return av1_ac_quant_Q3(qindex, delta, bit_depth);
+}
+
+int16_t av1_qindex_from_ac_Q3(int ac_Q3, aom_bit_depth_t bit_depth) {
int i;
- const int16_t *tab = ac_qlookup;
- ac *= 4;
-#if CONFIG_HIGHBITDEPTH
+ const int16_t *tab = ac_qlookup_Q3;
switch (bit_depth) {
case AOM_BITS_10: {
- tab = ac_qlookup_10;
- ac *= 4;
+ tab = ac_qlookup_10_Q3;
break;
}
case AOM_BITS_12: {
- tab = ac_qlookup_12;
- ac *= 16;
+ tab = ac_qlookup_12_Q3;
break;
}
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
-#endif
(void)bit_depth;
for (i = 0; i < QINDEX_RANGE; i++) {
- if (ac <= tab[i]) return i;
+ if (ac_Q3 <= tab[i]) return i;
}
return QINDEX_RANGE - 1;
}
@@ -333,55 +250,47 @@ int av1_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex) {
if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
- const int seg_qindex =
- seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data;
+ const int seg_qindex = base_qindex + data;
return clamp(seg_qindex, 0, MAXQ);
} else {
return base_qindex;
}
}
-#if CONFIG_AOM_QM
-qm_val_t *aom_iqmatrix(AV1_COMMON *cm, int qmlevel, int is_chroma,
- TX_SIZE tx_size, int is_intra) {
- return &cm->giqmatrix[qmlevel][!!is_chroma][!!is_intra][tx_size][0];
+const qm_val_t *av1_iqmatrix(AV1_COMMON *cm, int qmlevel, int plane,
+ TX_SIZE tx_size) {
+ return &cm->giqmatrix[qmlevel][plane][tx_size][0];
}
-qm_val_t *aom_qmatrix(AV1_COMMON *cm, int qmlevel, int is_chroma,
- TX_SIZE tx_size, int is_intra) {
- return &cm->gqmatrix[qmlevel][!!is_chroma][!!is_intra][tx_size][0];
+const qm_val_t *av1_qmatrix(AV1_COMMON *cm, int qmlevel, int plane,
+ TX_SIZE tx_size) {
+ return &cm->gqmatrix[qmlevel][plane][tx_size][0];
}
-#if CONFIG_CHROMA_2X2
-#define QM_TOTAL_SIZE 3348
-#else
#define QM_TOTAL_SIZE 3344
-#endif
-static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
-static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
+static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
+static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
-void aom_qm_init(AV1_COMMON *cm) {
- int q, c, f, t;
+void av1_qm_init(AV1_COMMON *cm) {
+ const int num_planes = av1_num_planes(cm);
+ int q, c, t;
int current;
for (q = 0; q < NUM_QM_LEVELS; ++q) {
- for (c = 0; c < 2; ++c) {
- for (f = 0; f < 2; ++f) {
- current = 0;
- for (t = 0; t < TX_SIZES_ALL; ++t) {
- const int size = tx_size_2d[t];
- // Don't use QM for sizes > 32x32
- if (q == NUM_QM_LEVELS - 1 || size > 1024) {
- cm->gqmatrix[q][c][f][t] = NULL;
- cm->giqmatrix[q][c][f][t] = NULL;
- } else {
- assert(current + size <= QM_TOTAL_SIZE);
- cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
- NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
- [current];
- cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
- NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
- [current];
- current += size;
- }
+ for (c = 0; c < num_planes; ++c) {
+ current = 0;
+ for (t = 0; t < TX_SIZES_ALL; ++t) {
+ const int size = tx_size_2d[t];
+ const int qm_tx_size = av1_get_adjusted_tx_size(t);
+ if (q == NUM_QM_LEVELS - 1) {
+ cm->gqmatrix[q][c][t] = NULL;
+ cm->giqmatrix[q][c][t] = NULL;
+ } else if (t != qm_tx_size) { // Reuse matrices for 'qm_tx_size'
+ cm->gqmatrix[q][c][t] = cm->gqmatrix[q][c][qm_tx_size];
+ cm->giqmatrix[q][c][t] = cm->giqmatrix[q][c][qm_tx_size];
+ } else {
+ assert(current + size <= QM_TOTAL_SIZE);
+ cm->gqmatrix[q][c][t] = &wt_matrix_ref[q][c >= 1][current];
+ cm->giqmatrix[q][c][t] = &iwt_matrix_ref[q][c >= 1][current];
+ current += size;
}
}
}
@@ -399,13 +308,9 @@ void aom_qm_init(AV1_COMMON *cm) {
frequency domain according to different nominal viewing
distances.
*/
-static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
+static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 43, 86, 86, 166,
-#endif
/* Size 4x4 */
32, 43, 73, 97, 43, 67, 94, 110, 73, 94, 137, 150, 97, 110, 150, 200,
/* Size 8x8 */
@@ -632,10 +537,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
89, 88, 87, 90, 93, 97, 99, 105, 107, 115, 116, 124, 127, 135, 139, 146,
152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 50, 62, 62, 100,
-#endif
/* Size 4x4 */
35, 46, 57, 66, 46, 60, 69, 71, 57, 69, 90, 90, 66, 71, 90, 109,
/* Size 8x8 */
@@ -848,10 +749,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 39, 82, 82, 155,
-#endif
/* Size 4x4 */
32, 41, 69, 92, 41, 63, 88, 103, 69, 88, 127, 140, 92, 103, 140, 184,
/* Size 8x8 */
@@ -1076,10 +973,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
94, 96, 101, 102, 110, 111, 118, 121, 129, 132, 138, 144, 150, 156, 161,
171, 174, 179, 181, 188, 188, 190 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 48, 60, 60, 97,
-#endif
/* Size 4x4 */
33, 45, 56, 64, 45, 58, 66, 69, 56, 66, 86, 87, 64, 69, 87, 105,
/* Size 8x8 */
@@ -1291,10 +1184,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 39, 76, 76, 140,
-#endif
/* Size 4x4 */
32, 38, 63, 86, 38, 56, 78, 97, 63, 78, 113, 130, 86, 97, 130, 169,
/* Size 8x8 */
@@ -1515,10 +1404,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
97, 98, 105, 106, 113, 115, 122, 125, 131, 136, 141, 147, 151, 160, 163,
168, 169, 175, 175, 176 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 48, 58, 58, 91,
-#endif
/* Size 4x4 */
32, 45, 53, 63, 45, 55, 62, 67, 53, 62, 80, 84, 63, 67, 84, 101,
/* Size 8x8 */
@@ -1730,10 +1615,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 36, 71, 71, 134,
-#endif
/* Size 4x4 */
32, 37, 58, 81, 37, 54, 72, 91, 58, 72, 102, 121, 81, 91, 121, 156,
/* Size 8x8 */
@@ -1953,10 +1834,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, 157, 157, 163,
163, 163 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 47, 55, 55, 89,
-#endif
/* Size 4x4 */
32, 45, 51, 61, 45, 54, 59, 65, 51, 59, 75, 81, 61, 65, 81, 97,
/* Size 8x8 */
@@ -2168,10 +2045,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 35, 63, 63, 117,
-#endif
/* Size 4x4 */
32, 34, 53, 75, 34, 49, 64, 81, 53, 64, 91, 112, 75, 81, 112, 140,
/* Size 8x8 */
@@ -2387,10 +2260,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151,
152 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 47, 52, 52, 82,
-#endif
/* Size 4x4 */
32, 46, 49, 58, 46, 53, 55, 62, 49, 55, 70, 78, 58, 62, 78, 91,
/* Size 8x8 */
@@ -2601,10 +2470,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 35, 58, 58, 105,
-#endif
/* Size 4x4 */
32, 34, 49, 72, 34, 48, 60, 79, 49, 60, 82, 104, 72, 79, 104, 134,
/* Size 8x8 */
@@ -2817,10 +2682,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
69, 69, 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118,
118, 125, 125, 133, 133, 136, 136, 141 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 47, 50, 50, 76,
-#endif
/* Size 4x4 */
32, 46, 47, 57, 46, 53, 54, 60, 47, 54, 66, 75, 57, 60, 75, 89,
/* Size 8x8 */
@@ -3031,10 +2892,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 34, 52, 52, 89,
-#endif
/* Size 4x4 */
32, 33, 45, 62, 33, 39, 51, 64, 45, 51, 71, 87, 62, 64, 87, 108,
/* Size 8x8 */
@@ -3246,10 +3103,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
65, 64, 64, 69, 69, 73, 74, 77, 79, 81, 85, 86, 91, 91, 98, 99, 103,
105, 108, 112, 114, 119, 119, 127, 127 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 41, 48, 48, 69,
-#endif
/* Size 4x4 */
31, 42, 47, 53, 42, 48, 50, 54, 47, 50, 61, 67, 53, 54, 67, 78,
/* Size 8x8 */
@@ -3460,10 +3313,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 47, 47, 75,
-#endif
/* Size 4x4 */
32, 33, 42, 55, 33, 38, 46, 57, 42, 46, 63, 75, 55, 57, 75, 92,
/* Size 8x8 */
@@ -3673,10 +3522,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
58, 62, 63, 65, 68, 68, 72, 73, 76, 79, 79, 84, 85, 88, 92, 92, 97, 98,
100, 105, 105, 109 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 39, 47, 47, 63,
-#endif
/* Size 4x4 */
31, 41, 46, 51, 41, 48, 48, 51, 46, 48, 58, 62, 51, 51, 62, 71,
/* Size 8x8 */
@@ -3887,10 +3732,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 42, 42, 64,
-#endif
/* Size 4x4 */
32, 32, 38, 51, 32, 35, 40, 49, 38, 40, 54, 64, 51, 49, 64, 81,
/* Size 8x8 */
@@ -4099,10 +3940,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
49, 49, 51, 54, 54, 57, 60, 60, 63, 65, 65, 69, 71, 72, 75, 76, 77, 81,
82, 83, 87, 87 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 38, 45, 45, 59,
-#endif
/* Size 4x4 */
31, 38, 47, 49, 38, 47, 46, 46, 47, 46, 54, 57, 49, 46, 57, 66,
/* Size 8x8 */
@@ -4313,10 +4150,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 38, 38, 54,
-#endif
/* Size 4x4 */
32, 32, 35, 43, 32, 34, 37, 43, 35, 37, 48, 54, 43, 43, 54, 65,
/* Size 8x8 */
@@ -4525,10 +4358,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
45, 45, 45, 45, 47, 50, 50, 51, 55, 56, 56, 58, 60, 60, 62, 66, 66, 67,
69, 70, 70, 73 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 45, 45, 54,
-#endif
/* Size 4x4 */
31, 37, 47, 47, 37, 44, 47, 45, 47, 47, 53, 53, 47, 45, 53, 59,
/* Size 8x8 */
@@ -4739,10 +4568,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 34, 34, 48,
-#endif
/* Size 4x4 */
32, 32, 34, 38, 32, 33, 35, 39, 34, 35, 39, 45, 38, 39, 45, 54,
/* Size 8x8 */
@@ -4951,10 +4776,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
42, 42, 42, 42, 42, 42, 42, 45, 48, 48, 48, 50, 54, 54, 54, 56, 58, 58,
58, 60, 63, 63 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 46, 46, 53,
-#endif
/* Size 4x4 */
31, 34, 42, 47, 34, 39, 45, 46, 42, 45, 48, 49, 47, 46, 49, 54,
/* Size 8x8 */
@@ -5165,10 +4986,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 33, 33, 39,
-#endif
/* Size 4x4 */
32, 32, 32, 35, 32, 32, 33, 35, 32, 33, 35, 38, 35, 35, 38, 46,
/* Size 8x8 */
@@ -5377,10 +5194,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 41, 42, 42, 42, 44, 47,
48, 48, 48, 49 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 42, 42, 48,
-#endif
/* Size 4x4 */
31, 32, 38, 46, 32, 34, 41, 46, 38, 41, 47, 47, 46, 46, 47, 52,
/* Size 8x8 */
@@ -5591,10 +5404,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 35,
-#endif
/* Size 4x4 */
31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 33, 34, 32, 33, 34, 35,
/* Size 8x8 */
@@ -5803,10 +5612,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 36,
36, 37, 38, 38 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 38, 38, 46,
-#endif
/* Size 4x4 */
31, 31, 34, 38, 31, 32, 35, 40, 34, 35, 39, 43, 38, 40, 43, 47,
/* Size 8x8 */
@@ -6017,10 +5822,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 33,
-#endif
/* Size 4x4 */
31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33,
/* Size 8x8 */
@@ -6229,10 +6030,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34,
34, 34, 34, 34 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 33, 33, 36,
-#endif
/* Size 4x4 */
31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 32, 35, 34, 35, 35, 39,
/* Size 8x8 */
@@ -6443,10 +6240,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 31, 31, 32,
-#endif
/* Size 4x4 */
31, 31, 31, 31, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
/* Size 8x8 */
@@ -6655,10 +6448,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 31, 31, 31,
-#endif
/* Size 4x4 */
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
/* Size 8x8 */
@@ -6869,10 +6658,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 32,
-#endif
/* Size 4x4 */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* Size 8x8 */
@@ -7081,10 +6866,6 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 32,
-#endif
/* Size 4x4 */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* Size 8x8 */
@@ -7295,13 +7076,9 @@ static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
};
-static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
+static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 24, 12, 12, 6,
-#endif
/* Size 4x4 */
32, 24, 14, 11, 24, 15, 11, 9, 14, 11, 7, 7, 11, 9, 7, 5,
/* Size 8x8 */
@@ -7494,10 +7271,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5,
5, 5 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 20, 17, 17, 10,
-#endif
/* Size 4x4 */
29, 22, 18, 16, 22, 17, 15, 14, 18, 15, 11, 11, 16, 14, 11, 9,
/* Size 8x8 */
@@ -7708,10 +7481,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 26, 12, 12, 7,
-#endif
/* Size 4x4 */
32, 25, 15, 11, 25, 16, 12, 10, 15, 12, 8, 7, 11, 10, 7, 6,
/* Size 8x8 */
@@ -7907,10 +7676,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
12, 12, 12, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6,
6, 6, 6, 6, 5, 5, 5 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 21, 17, 17, 11,
-#endif
/* Size 4x4 */
31, 23, 18, 16, 23, 18, 16, 15, 18, 16, 12, 12, 16, 15, 12, 10,
/* Size 8x8 */
@@ -8121,10 +7886,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 26, 13, 13, 7,
-#endif
/* Size 4x4 */
32, 27, 16, 12, 27, 18, 13, 11, 16, 13, 9, 8, 12, 11, 8, 6,
/* Size 8x8 */
@@ -8321,10 +8082,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
12, 12, 12, 12, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7,
7, 7, 6, 6, 6, 6, 6, 6, 6 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 21, 18, 18, 11,
-#endif
/* Size 4x4 */
32, 23, 19, 16, 23, 19, 17, 15, 19, 17, 13, 12, 16, 15, 12, 10,
/* Size 8x8 */
@@ -8535,10 +8292,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 28, 14, 14, 8,
-#endif
/* Size 4x4 */
32, 28, 18, 13, 28, 19, 14, 11, 18, 14, 10, 8, 13, 11, 8, 7,
/* Size 8x8 */
@@ -8735,10 +8488,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
11, 12, 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9,
9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 22, 19, 19, 12,
-#endif
/* Size 4x4 */
32, 23, 20, 17, 23, 19, 17, 16, 20, 17, 14, 13, 17, 16, 13, 11,
/* Size 8x8 */
@@ -8949,10 +8698,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 29, 16, 16, 9,
-#endif
/* Size 4x4 */
32, 30, 19, 14, 30, 21, 16, 13, 19, 16, 11, 9, 14, 13, 9, 7,
/* Size 8x8 */
@@ -9152,10 +8897,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
7, 7, 7, 8, 12, 12, 12, 13, 13, 13, 13, 14, 13, 13, 12, 12, 12, 11, 11,
11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 22, 20, 20, 12,
-#endif
/* Size 4x4 */
32, 22, 21, 18, 22, 19, 19, 17, 21, 19, 15, 13, 18, 17, 13, 11,
/* Size 8x8 */
@@ -9366,10 +9107,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 29, 18, 18, 10,
-#endif
/* Size 4x4 */
32, 30, 21, 14, 30, 21, 17, 13, 21, 17, 12, 10, 14, 13, 10, 8,
/* Size 8x8 */
@@ -9571,10 +9308,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
10, 9, 9, 9, 9, 8, 8, 8, 13, 14, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13,
13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 22, 20, 20, 13,
-#endif
/* Size 4x4 */
32, 22, 22, 18, 22, 19, 19, 17, 22, 19, 16, 14, 18, 17, 14, 12,
/* Size 8x8 */
@@ -9785,10 +9518,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 30, 20, 20, 12,
-#endif
/* Size 4x4 */
32, 31, 23, 17, 31, 26, 20, 16, 23, 20, 14, 12, 17, 16, 12, 9,
/* Size 8x8 */
@@ -9997,10 +9726,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
8, 8 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 25, 21, 21, 15,
-#endif
/* Size 4x4 */
33, 24, 22, 19, 24, 21, 20, 19, 22, 20, 17, 15, 19, 19, 15, 13,
/* Size 8x8 */
@@ -10211,10 +9936,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 22, 22, 14,
-#endif
/* Size 4x4 */
32, 31, 24, 19, 31, 27, 22, 18, 24, 22, 16, 14, 19, 18, 14, 11,
/* Size 8x8 */
@@ -10423,10 +10144,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10,
10, 10, 10, 9 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 26, 22, 22, 16,
-#endif
/* Size 4x4 */
33, 25, 22, 20, 25, 21, 21, 20, 22, 21, 18, 17, 20, 20, 17, 14,
/* Size 8x8 */
@@ -10637,10 +10354,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 24, 24, 16,
-#endif
/* Size 4x4 */
32, 32, 27, 20, 32, 29, 26, 21, 27, 26, 19, 16, 20, 21, 16, 13,
/* Size 8x8 */
@@ -10849,10 +10562,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13,
12, 12, 12, 12 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 27, 23, 23, 17,
-#endif
/* Size 4x4 */
33, 27, 22, 21, 27, 22, 22, 22, 22, 22, 19, 18, 21, 22, 18, 16,
/* Size 8x8 */
@@ -11063,10 +10772,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 27, 27, 19,
-#endif
/* Size 4x4 */
32, 32, 29, 24, 32, 30, 28, 24, 29, 28, 21, 19, 24, 24, 19, 16,
/* Size 8x8 */
@@ -11275,10 +10980,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
23, 23, 23, 23, 22, 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15,
15, 15, 15, 14 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 31, 23, 23, 19,
-#endif
/* Size 4x4 */
33, 28, 22, 22, 28, 23, 22, 23, 22, 22, 19, 19, 22, 23, 19, 17,
/* Size 8x8 */
@@ -11489,10 +11190,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 30, 30, 21,
-#endif
/* Size 4x4 */
32, 32, 30, 27, 32, 31, 29, 26, 30, 29, 26, 23, 27, 26, 23, 19,
/* Size 8x8 */
@@ -11701,10 +11398,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, 19, 18, 18, 18,
18, 17, 16, 16 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 22, 22, 19,
-#endif
/* Size 4x4 */
33, 30, 24, 22, 30, 26, 23, 22, 24, 23, 21, 21, 22, 22, 21, 19,
/* Size 8x8 */
@@ -11915,10 +11608,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 31, 31, 26,
-#endif
/* Size 4x4 */
32, 32, 32, 29, 32, 32, 31, 29, 32, 31, 29, 27, 29, 29, 27, 22,
/* Size 8x8 */
@@ -12127,10 +11816,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22,
21, 21, 21, 21 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 24, 24, 21,
-#endif
/* Size 4x4 */
33, 32, 27, 22, 32, 30, 25, 22, 27, 25, 22, 22, 22, 22, 22, 20,
/* Size 8x8 */
@@ -12341,10 +12026,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 29,
-#endif
/* Size 4x4 */
33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 31, 30, 32, 31, 30, 29,
/* Size 8x8 */
@@ -12553,10 +12234,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28,
28, 28, 27, 27 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 27, 27, 22,
-#endif
/* Size 4x4 */
33, 33, 30, 27, 33, 32, 29, 26, 30, 29, 26, 24, 27, 26, 24, 22,
/* Size 8x8 */
@@ -12767,10 +12444,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 31,
-#endif
/* Size 4x4 */
33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31,
/* Size 8x8 */
@@ -12979,10 +12652,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30,
30, 30, 30, 30 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 31, 31, 28,
-#endif
/* Size 4x4 */
33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 32, 29, 30, 29, 29, 26,
/* Size 8x8 */
@@ -13193,10 +12862,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 33, 33, 32,
-#endif
/* Size 4x4 */
33, 33, 33, 33, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
/* Size 8x8 */
@@ -13405,10 +13070,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 33, 33, 33, 33,
-#endif
/* Size 4x4 */
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
/* Size 8x8 */
@@ -13619,10 +13280,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
},
{
{ /* Luma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 32,
-#endif
/* Size 4x4 */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* Size 8x8 */
@@ -13831,10 +13488,6 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32 },
{ /* Chroma */
-#if CONFIG_CHROMA_2X2
- /* Size 2x2 */
- 32, 32, 32, 32,
-#endif
/* Size 4x4 */
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
/* Size 8x8 */
@@ -14044,63 +13697,3 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
32, 32, 32, 32 },
},
};
-#endif
-
-#if CONFIG_PVQ
-/* Quantization matrices for 8x8. For other block sizes, we currently just do
- resampling. */
-/* Flat quantization, i.e. optimize for PSNR. */
-const int OD_QM8_Q4_FLAT[] = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16 };
-#if 0
-/* M1: MPEG2 matrix for inter (which has a dead zone). */
-const int OD_QM8_Q4[] = {
- 16, 17, 18, 19, 20, 21, 22, 23,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 18, 19, 20, 21, 22, 23, 24, 25,
- 19, 20, 21, 22, 23, 24, 26, 27,
- 20, 21, 22, 23, 25, 26, 27, 28,
- 21, 22, 23, 24, 26, 27, 28, 30,
- 22, 23, 24, 26, 27, 28, 30, 31,
- 23, 24, 25, 27, 28, 30, 31, 33};
-#endif
-#if 0
-/* M2: MPEG2 matrix for intra (no dead zone). */
-const int OD_QM8_Q4[] = {
- 16, 16, 19, 22, 22, 26, 26, 27,
- 16, 16, 22, 22, 26, 27, 27, 29,
- 19, 22, 26, 26, 27, 29, 29, 35,
- 22, 24, 27, 27, 29, 32, 34, 38,
- 26, 27, 29, 29, 32, 35, 38, 46,
- 27, 29, 34, 34, 35, 40, 46, 56,
- 29, 34, 34, 37, 40, 48, 56, 69,
- 34, 37, 38, 40, 48, 58, 69, 83
-};
-#endif
-#if 0
-/* M3: Taken from dump_psnrhvs. */
-const int OD_QM8_Q4[] = {
- 16, 16, 17, 20, 24, 29, 36, 42,
- 16, 17, 17, 19, 22, 26, 31, 37,
- 17, 17, 21, 23, 26, 30, 34, 40,
- 20, 19, 23, 28, 31, 35, 39, 45,
- 24, 22, 26, 31, 36, 41, 46, 51,
- 29, 26, 30, 35, 41, 47, 52, 58,
- 36, 31, 34, 39, 46, 52, 59, 66,
- 42, 37, 40, 45, 51, 58, 66, 73
-};
-#endif
-#if 1
-/* M4: a compromise equal to .5*(M3 + .5*(M2+transpose(M2))) */
-const int OD_QM8_Q4_HVS[] = { 16, 16, 18, 21, 24, 28, 32, 36, 16, 17, 20,
- 21, 24, 27, 31, 35, 18, 20, 24, 25, 27, 31,
- 33, 38, 21, 21, 25, 28, 30, 34, 37, 42, 24,
- 24, 27, 30, 34, 38, 43, 49, 28, 27, 31, 34,
- 38, 44, 50, 58, 32, 31, 33, 37, 43, 50, 58,
- 68, 36, 35, 38, 42, 49, 58, 68, 78 };
-#endif
-#endif
diff --git a/third_party/aom/av1/common/quant_common.h b/third_party/aom/av1/common/quant_common.h
index 92843fe4d..f9681036d 100644
--- a/third_party/aom/av1/common/quant_common.h
+++ b/third_party/aom/av1/common/quant_common.h
@@ -25,82 +25,37 @@ extern "C" {
#define MAXQ 255
#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
-#if CONFIG_AOM_QM
// Total number of QM sets stored
#define QM_LEVEL_BITS 4
#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
/* Range of QMS is between first and last value, with offset applied to inter
* blocks*/
+#define DEFAULT_QM_Y 10
+#define DEFAULT_QM_U 11
+#define DEFAULT_QM_V 12
#define DEFAULT_QM_FIRST 5
#define DEFAULT_QM_LAST 9
-#define DEFAULT_QM_INTER_OFFSET 0
-#endif
struct AV1Common;
-int16_t av1_dc_quant(int qindex, int delta, aom_bit_depth_t bit_depth);
-int16_t av1_ac_quant(int qindex, int delta, aom_bit_depth_t bit_depth);
-int16_t av1_qindex_from_ac(int ac, aom_bit_depth_t bit_depth);
+int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
+int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
+int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
+int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
+int16_t av1_qindex_from_ac_Q3(int ac_Q3, aom_bit_depth_t bit_depth);
int av1_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex);
-#if CONFIG_AOM_QM
// Reduce the large number of quantizers to a smaller number of levels for which
// different matrices may be defined
static INLINE int aom_get_qmlevel(int qindex, int first, int last) {
return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
}
-void aom_qm_init(struct AV1Common *cm);
-qm_val_t *aom_iqmatrix(struct AV1Common *cm, int qindex, int comp,
- TX_SIZE tx_size, int is_intra);
-qm_val_t *aom_qmatrix(struct AV1Common *cm, int qindex, int comp,
- TX_SIZE tx_size, int is_intra);
-#endif
-
-#if CONFIG_NEW_QUANT
-
-#define QUANT_PROFILES 4
-#define QUANT_RANGES 2
-#define NUQ_KNOTS 3
-
-typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1];
-typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS];
-void av1_get_dequant_val_nuq(int q, int band, tran_low_t *dq,
- tran_low_t *cuml_bins, int dq_off_index);
-tran_low_t av1_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
-tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
-
-static INLINE int qindex_to_qrange(int qindex) {
- return (qindex < 140 ? 1 : 0);
-}
-
-static INLINE int get_dq_profile_from_ctx(int qindex, int q_ctx, int is_inter,
- PLANE_TYPE plane_type) {
- // intra/inter, Y/UV, ctx, qrange
- static const int
- def_dq_profile_lookup[REF_TYPES][PLANE_TYPES][COEFF_CONTEXTS0]
- [QUANT_RANGES] = {
- {
- // intra
- { { 2, 1 }, { 2, 1 }, { 2, 1 } }, // Y
- { { 3, 1 }, { 3, 1 }, { 3, 1 } }, // UV
- },
- {
- // inter
- { { 3, 1 }, { 2, 1 }, { 2, 1 } }, // Y
- { { 3, 1 }, { 3, 1 }, { 3, 1 } }, // UV
- },
- };
- if (!qindex) return 0; // lossless
- return def_dq_profile_lookup[is_inter][plane_type][q_ctx]
- [qindex_to_qrange(qindex)];
-}
-#endif // CONFIG_NEW_QUANT
-
-#if CONFIG_PVQ
-extern const int OD_QM8_Q4_FLAT[];
-extern const int OD_QM8_Q4_HVS[];
-#endif
+void av1_qm_init(struct AV1Common *cm);
+const qm_val_t *av1_iqmatrix(struct AV1Common *cm, int qindex, int comp,
+ TX_SIZE tx_size);
+const qm_val_t *av1_qmatrix(struct AV1Common *cm, int qindex, int comp,
+ TX_SIZE tx_size);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c
index a1a22a0af..b6ac436fb 100644
--- a/third_party/aom/av1/common/reconinter.c
+++ b/third_party/aom/av1/common/reconinter.c
@@ -13,208 +13,157 @@
#include <stdio.h>
#include <limits.h>
-#include "./aom_scale_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-#include "./aom_config.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/aom_scale_rtcd.h"
#include "aom/aom_integer.h"
#include "aom_dsp/blend.h"
#include "av1/common/blockd.h"
+#include "av1/common/mvref_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
-#if CONFIG_MOTION_VAR
#include "av1/common/onyxc_int.h"
#include "av1/common/obmc.h"
-#endif // CONFIG_MOTION_VAR
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+#define USE_PRECOMPUTED_WEDGE_MASK 1
+#define USE_PRECOMPUTED_WEDGE_SIGN 1
+
// This function will determine whether or not to create a warped
-// prediction and return the appropriate motion model depending
-// on the configuration. Behavior will change with different
-// combinations of GLOBAL_MOTION, WARPED_MOTION and MOTION_VAR.
-static INLINE int allow_warp(const MODE_INFO *const mi,
- const WarpTypesAllowed *const warp_types,
-#if CONFIG_GLOBAL_MOTION
- const WarpedMotionParams *const gm_params,
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_MOTION_VAR
- int build_for_obmc,
-#endif // CONFIG_MOTION_VAR
- WarpedMotionParams *final_warp_params) {
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
- *final_warp_params = default_warp_params;
-
-// Only global motion configured
-#if CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
- (void)mbmi;
- if (warp_types->global_warp_allowed) {
- memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
- return 1;
- }
-#endif // CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
+// prediction.
+int av1_allow_warp(const MB_MODE_INFO *const mbmi,
+ const WarpTypesAllowed *const warp_types,
+ const WarpedMotionParams *const gm_params,
+ int build_for_obmc, int x_scale, int y_scale,
+ WarpedMotionParams *final_warp_params) {
+ if (x_scale != SCALE_SUBPEL_SHIFTS || y_scale != SCALE_SUBPEL_SHIFTS)
+ return 0;
-// Only warped motion configured
-#if CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
- if (warp_types->local_warp_allowed) {
- memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
- return 1;
- }
-#endif // CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
-
-// Warped and global motion configured
-#if CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
- // When both are enabled, warped will take priority. The global parameters
- // will only be used to compute projection samples to find the warped model.
- // Note that when a block chooses global, it will not be possible to
- // select WARPED_CAUSAL.
- if (warp_types->local_warp_allowed) {
- memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
- return 1;
- } else if (warp_types->global_warp_allowed) {
- memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
- return 1;
- }
-#endif // CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
-
-// Motion var and global motion configured
-#if CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
- // We warp if either case is true:
- // 1.) We are predicting a block which uses global motion
- // 2.) We are predicting a neighboring block of a block using OBMC,
- // the neighboring block uses global motion, and we have enabled
- // WARP_GM_NEIGHBORS_WITH_OBMC
- (void)mbmi;
- if (warp_types->global_warp_allowed &&
- (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
- memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
- return 1;
- }
-#endif // CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
-
-// Motion var and warped motion configured
-#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
- // We warp if either case is true:
- // 1.) We are predicting a block with motion mode WARPED_CAUSAL
- // 2.) We are predicting a neighboring block of a block using OBMC,
- // the neighboring block has mode WARPED_CAUSAL, and we have enabled
- // WARP_WM_NEIGHBORS_WITH_OBMC
- if (warp_types->local_warp_allowed) {
- if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
- memcpy(final_warp_params, &mbmi->wm_params[0],
- sizeof(*final_warp_params));
- return 1;
- }
- }
-#endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
+ if (final_warp_params != NULL) *final_warp_params = default_warp_params;
+
+ if (build_for_obmc) return 0;
-// Motion var, warped motion and global motion all configured
-#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
- if (warp_types->local_warp_allowed) {
- if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
+ if (warp_types->local_warp_allowed && !mbmi->wm_params[0].invalid) {
+ if (final_warp_params != NULL)
memcpy(final_warp_params, &mbmi->wm_params[0],
sizeof(*final_warp_params));
- return 1;
- }
- } else if (warp_types->global_warp_allowed &&
- (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
- memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
+ return 1;
+ } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
+ if (final_warp_params != NULL)
+ memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
return 1;
}
-#endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
return 0;
}
-#endif // CONFIG_GLOBAL_MOTION ||CONFIG_WARPED_MOTION
-static INLINE void av1_make_inter_predictor(
- const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
- const int subpel_x, const int subpel_y, const struct scale_factors *sf,
- int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- const WarpTypesAllowed *warp_types, int p_col, int p_row, int plane,
- int ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- const MODE_INFO *mi, int build_for_obmc,
-#endif
- int xs, int ys, const MACROBLOCKD *xd) {
- (void)xd;
-
-#if !CONFIG_MOTION_VAR
- const MODE_INFO *mi = xd->mi[0];
- (void)mi;
-#endif // CONFIG_MOTION_VAR
-
-// Make sure the selected motion mode is valid for this configuration
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- assert_motion_mode_valid(mi->mbmi.motion_mode,
-#if CONFIG_GLOBAL_MOTION
- 0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- xd,
-#endif
- mi);
-#endif // CONFIG MOTION_VAR || CONFIG_WARPED_MOTION
+void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, const SubpelParams *subpel_params,
+ const struct scale_factors *sf, int w, int h,
+ ConvolveParams *conv_params,
+ InterpFilters interp_filters,
+ const WarpTypesAllowed *warp_types, int p_col,
+ int p_row, int plane, int ref,
+ const MB_MODE_INFO *mi, int build_for_obmc,
+ const MACROBLOCKD *xd, int can_use_previous) {
+ // Make sure the selected motion mode is valid for this configuration
+ assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi,
+ can_use_previous);
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-#if CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
WarpedMotionParams final_warp_params;
- const int do_warp = allow_warp(
- mi, warp_types,
-#if CONFIG_GLOBAL_MOTION
-#if CONFIG_COMPOUND_SINGLEREF
- // TODO(zoeliu): To further check the single
- // ref comp mode to work together with
- // global motion.
- has_second_ref(&mi->mbmi) ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
- : &xd->global_motion[mi->mbmi.ref_frame[0]],
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- &xd->global_motion[mi->mbmi.ref_frame[ref]],
-#endif // CONFIG_COMPOUND_SINGLEREF
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_MOTION_VAR
- build_for_obmc,
-#endif // CONFIG_MOTION_VAR
- &final_warp_params);
- if (do_warp
-#if CONFIG_AMVR
- && xd->cur_frame_mv_precision_level == 0
-#endif
- ) {
+ const int do_warp =
+ (w >= 8 && h >= 8 &&
+ av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
+ build_for_obmc, subpel_params->xs, subpel_params->ys,
+ &final_warp_params));
+ if (do_warp && xd->cur_frame_force_integer_mv == 0) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const struct buf_2d *const pre_buf = &pd->pre[ref];
av1_warp_plane(&final_warp_params,
-#if CONFIG_HIGHBITDEPTH
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-#endif // CONFIG_HIGHBITDEPTH
pre_buf->buf0, pre_buf->width, pre_buf->height,
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
- pd->subsampling_x, pd->subsampling_y, xs, ys, conv_params);
- return;
- }
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, conv_params, interp_filters, xs, ys,
- xd->bd);
- return;
+ pd->subsampling_x, pd->subsampling_y, conv_params);
+ } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
+ w, h, conv_params, interp_filters, xd->bd);
+ } else {
+ inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h,
+ conv_params, interp_filters);
}
-#endif // CONFIG_HIGHBITDEPTH
- inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
- h, conv_params, interp_filters, xs, ys);
}
-#define NSMOOTHERS 1
+#if USE_PRECOMPUTED_WEDGE_MASK
+static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
+ 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+};
+static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
+ 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+};
+static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
+ 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+};
-// [smoother][negative][direction]
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
- [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
+ if (shift >= 0) {
+ memcpy(dst + shift, src, width - shift);
+ memset(dst, src[0], shift);
+ } else {
+ shift = -shift;
+ memcpy(dst, src + shift, width - shift);
+ memset(dst + width - shift, src[width - 1], shift);
+ }
+}
+#endif // USE_PRECOMPUTED_WEDGE_MASK
+#if USE_PRECOMPUTED_WEDGE_SIGN
+/* clang-format off */
+DECLARE_ALIGNED(16, static uint8_t,
+ wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
+ { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
+};
+/* clang-format on */
+#else
DECLARE_ALIGNED(16, static uint8_t,
wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
+#endif // USE_PRECOMPUTED_WEDGE_SIGN
+
+// [negative][direction]
+DECLARE_ALIGNED(
+ 16, static uint8_t,
+ wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
@@ -223,88 +172,6 @@ DECLARE_ALIGNED(16, static uint8_t,
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
-// Some unused wedge codebooks left temporarily to facilitate experiments.
-// To be removed when settled.
-/*
-static wedge_code_type wedge_codebook_8_hgtw[8] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
- { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
-};
-
-static wedge_code_type wedge_codebook_8_hltw[8] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
- { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
-};
-
-static wedge_code_type wedge_codebook_8_heqw[8] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
- { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
-};
-
-static const wedge_code_type wedge_codebook_32_hgtw[32] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
- { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
- { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
- { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
- { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
- { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
- { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
- { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
- { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
- { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
- { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
- { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
- { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
-};
-
-static const wedge_code_type wedge_codebook_32_hltw[32] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
- { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
- { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
- { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
- { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
- { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
- { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
- { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
- { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
- { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
- { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
- { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
- { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
- { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
-};
-
-static const wedge_code_type wedge_codebook_32_heqw[32] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
- { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
- { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
- { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
- { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
- { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
- { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
- { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
- { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
- { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
- { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
- { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
- { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
- { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
-};
-*/
-
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
{ WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
{ WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
@@ -339,78 +206,37 @@ static const wedge_code_type wedge_codebook_16_heqw[16] = {
};
const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#if CONFIG_WEDGE
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
wedge_masks[BLOCK_8X8] },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
+ { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
wedge_masks[BLOCK_8X16] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
+ { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
wedge_masks[BLOCK_16X8] },
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
wedge_masks[BLOCK_16X16] },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
+ { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
wedge_masks[BLOCK_16X32] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
+ { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
wedge_masks[BLOCK_32X16] },
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
wedge_masks[BLOCK_32X32] },
-#else
- { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
- wedge_masks[BLOCK_8X8] },
- { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
- wedge_masks[BLOCK_8X16] },
- { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
- wedge_masks[BLOCK_16X8] },
- { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
- wedge_masks[BLOCK_16X16] },
- { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
- wedge_masks[BLOCK_16X32] },
- { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
- wedge_masks[BLOCK_32X16] },
- { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
- wedge_masks[BLOCK_32X32] },
-#endif // CONFIG_WEDGE
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#if CONFIG_EXT_PARTITION
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#endif // CONFIG_EXT_PARTITION
-#if CONFIG_WEDGE
- { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
- wedge_masks[BLOCK_4X16] },
- { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
- wedge_masks[BLOCK_16X4] },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
- wedge_masks[BLOCK_8X32] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
- wedge_masks[BLOCK_32X8] },
-#else
- { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
- wedge_masks[BLOCK_4X16] },
- { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
- wedge_masks[BLOCK_16X4] },
- { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
+ { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
wedge_masks[BLOCK_8X32] },
- { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
+ { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
wedge_masks[BLOCK_32X8] },
-#endif // CONFIG_WEDGE
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#if CONFIG_EXT_PARTITION
- { 0, NULL, NULL, 0, NULL },
- { 0, NULL, NULL, 0, NULL },
-#endif // CONFIG_EXT_PARTITION
+ { 0, NULL, NULL, NULL },
+ { 0, NULL, NULL, NULL },
};
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
@@ -420,7 +246,6 @@ static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
const int bw = block_size_wide[sb_type];
const wedge_code_type *a =
wedge_params_lookup[sb_type].codebook + wedge_index;
- const int smoother = wedge_params_lookup[sb_type].smoother;
int woff, hoff;
const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
@@ -428,339 +253,231 @@ static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
woff = (a->x_offset * bw) >> 3;
hoff = (a->y_offset * bh) >> 3;
- master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
+ master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
MASK_MASTER_SIZE / 2 - woff;
return master;
}
-const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
- BLOCK_SIZE sb_type, int offset_x,
- int offset_y) {
- const uint8_t *mask =
- get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
- if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
- return mask;
-}
-
-#if CONFIG_COMPOUND_SEGMENT
-static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
- int h, int w, int stride) {
- int i, j;
-
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- mask_inv_buffer[i * stride + j] =
- AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
- }
- return mask_inv_buffer;
-}
-#endif // CONFIG_COMPOUND_SEGMENT
-
-const uint8_t *av1_get_compound_type_mask_inverse(
- const INTERINTER_COMPOUND_DATA *const comp_data,
-#if CONFIG_COMPOUND_SEGMENT
- uint8_t *mask_buffer, int h, int w, int stride,
-#endif
- BLOCK_SIZE sb_type) {
- assert(is_masked_compound_type(comp_data->interinter_compound_type));
- (void)sb_type;
- switch (comp_data->interinter_compound_type) {
-#if CONFIG_WEDGE
- case COMPOUND_WEDGE:
- return av1_get_contiguous_soft_mask(comp_data->wedge_index,
- !comp_data->wedge_sign, sb_type);
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- case COMPOUND_SEG:
- return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
-#endif // CONFIG_COMPOUND_SEGMENT
- default: assert(0); return NULL;
- }
-}
-
const uint8_t *av1_get_compound_type_mask(
const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
- assert(is_masked_compound_type(comp_data->interinter_compound_type));
+ assert(is_masked_compound_type(comp_data->type));
(void)sb_type;
- switch (comp_data->interinter_compound_type) {
-#if CONFIG_WEDGE
+ switch (comp_data->type) {
case COMPOUND_WEDGE:
return av1_get_contiguous_soft_mask(comp_data->wedge_index,
comp_data->wedge_sign, sb_type);
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- case COMPOUND_SEG: return comp_data->seg_mask;
-#endif // CONFIG_COMPOUND_SEGMENT
+ case COMPOUND_DIFFWTD: return comp_data->seg_mask;
default: assert(0); return NULL;
}
}
-#if CONFIG_COMPOUND_SEGMENT
-#if COMPOUND_SEGMENT_TYPE == 0
-static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
- int h, int w, int mask_val) {
- int i, j;
- int block_stride = block_size_wide[sb_type];
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- mask[i * block_stride + j] =
- which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
- }
-}
-
-void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w) {
- (void)src0;
- (void)src1;
- (void)src0_stride;
- (void)src1_stride;
- switch (mask_type) {
- case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
- case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
- default: assert(0);
- }
-}
-
-#if CONFIG_HIGHBITDEPTH
-void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w, int bd) {
- (void)src0;
- (void)src1;
- (void)src0_stride;
- (void)src1_stride;
- (void)bd;
- switch (mask_type) {
- case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
- case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
- default: assert(0);
- }
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-#elif COMPOUND_SEGMENT_TYPE == 1
-#define DIFF_FACTOR 16
-
-#if CONFIG_CONVOLVE_ROUND
-static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
- const int32_t *src0, int src0_stride,
- const int32_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w,
- ConvolveParams *conv_params, int bd) {
+static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
+ const CONV_BUF_TYPE *src0, int src0_stride,
+ const CONV_BUF_TYPE *src1, int src1_stride, int h,
+ int w, ConvolveParams *conv_params, int bd) {
int round =
2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
int i, j, m, diff;
- int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
diff = ROUND_POWER_OF_TWO(diff, round);
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
- mask[i * block_stride + j] =
- which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
+ mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
-static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const int32_t *src0, int src0_stride,
- const int32_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w,
- ConvolveParams *conv_params, int bd) {
+void av1_build_compound_diffwtd_mask_d16_c(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
+ int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+ ConvolveParams *conv_params, int bd) {
switch (mask_type) {
case DIFFWTD_38:
- diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
- sb_type, h, w, conv_params, bd);
+ diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
+ conv_params, bd);
break;
case DIFFWTD_38_INV:
- diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
- sb_type, h, w, conv_params, bd);
+ diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
+ conv_params, bd);
break;
default: assert(0);
}
}
-#endif
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w) {
+ const uint8_t *src1, int src1_stride, int h, int w) {
int i, j, m, diff;
- int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff =
abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
- mask[i * block_stride + j] =
- which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
+ mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
-void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w) {
+void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
+ DIFFWTD_MASK_TYPE mask_type,
+ const uint8_t *src0, int src0_stride,
+ const uint8_t *src1, int src1_stride,
+ int h, int w) {
switch (mask_type) {
case DIFFWTD_38:
- diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
- h, w);
+ diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
break;
case DIFFWTD_38_INV:
- diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
- h, w);
+ diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
break;
default: assert(0);
}
}
-#if CONFIG_HIGHBITDEPTH
-static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
- const uint16_t *src0, int src0_stride,
- const uint16_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w, int bd) {
- int i, j, m, diff;
- int block_stride = block_size_wide[sb_type];
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- diff = abs((int)src0[i * src0_stride + j] -
- (int)src1[i * src1_stride + j]) >>
- (bd - 8);
- m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
- mask[i * block_stride + j] =
- which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
+static AOM_FORCE_INLINE void diffwtd_mask_highbd(
+ uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
+ int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
+ const unsigned int bd) {
+ assert(bd >= 8);
+ if (bd == 8) {
+ if (which_inverse) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
+ unsigned int m = negative_to_zero(mask_base + diff);
+ m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
+ mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
+ }
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
+ unsigned int m = negative_to_zero(mask_base + diff);
+ m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
+ mask[j] = m;
+ }
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += w;
+ }
+ }
+ } else {
+ const unsigned int bd_shift = bd - 8;
+ if (which_inverse) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ int diff =
+ (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
+ unsigned int m = negative_to_zero(mask_base + diff);
+ m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
+ mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
+ }
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ int diff =
+ (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
+ unsigned int m = negative_to_zero(mask_base + diff);
+ m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
+ mask[j] = m;
+ }
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += w;
+ }
}
}
}
-void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w, int bd) {
+void av1_build_compound_diffwtd_mask_highbd_c(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
+ int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
+ int bd) {
switch (mask_type) {
case DIFFWTD_38:
diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
- CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
- bd);
+ CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
break;
case DIFFWTD_38_INV:
diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
- CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
- bd);
+ CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
break;
default: assert(0);
}
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // COMPOUND_SEGMENT_TYPE
-#endif // CONFIG_COMPOUND_SEGMENT
-
-#if MASK_MASTER_SIZE == 64
-static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
- 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- }
-};
-static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
- 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- }
-};
-static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
- 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-} };
-
-static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
- if (shift >= 0) {
- memcpy(dst + shift, src, width - shift);
- memset(dst, src[0], shift);
- } else {
- shift = -shift;
- memcpy(dst, src + shift, width - shift);
- memset(dst + width - shift, src[width - 1], shift);
- }
-}
-#else
-static const double smoother_param[NSMOOTHERS] = { 3.0 };
-#endif // MASK_MASTER_SIZE == 64
static void init_wedge_master_masks() {
- int i, j, s;
+ int i, j;
const int w = MASK_MASTER_SIZE;
const int h = MASK_MASTER_SIZE;
const int stride = MASK_MASTER_STRIDE;
- for (s = 0; s < NSMOOTHERS; s++) {
// Note: index [0] stores the masters, and [1] its complement.
-#if MASK_MASTER_SIZE == 64
- // Generate prototype by shifting the masters
- int shift = h / 4;
- for (i = 0; i < h; i += 2) {
- shift_copy(wedge_master_oblique_even[s],
- &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
- MASK_MASTER_SIZE);
- shift--;
- shift_copy(wedge_master_oblique_odd[s],
- &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
- shift, MASK_MASTER_SIZE);
- memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
- wedge_master_vertical[s],
- MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
- memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
- wedge_master_vertical[s],
- MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
- }
+#if USE_PRECOMPUTED_WEDGE_MASK
+ // Generate prototype by shifting the masters
+ int shift = h / 4;
+ for (i = 0; i < h; i += 2) {
+ shift_copy(wedge_master_oblique_even,
+ &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
+ MASK_MASTER_SIZE);
+ shift--;
+ shift_copy(wedge_master_oblique_odd,
+ &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
+ MASK_MASTER_SIZE);
+ memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
+ wedge_master_vertical,
+ MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
+ memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
+ wedge_master_vertical,
+ MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
+ }
#else
- const int a[2] = { 2, 1 };
- const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; ++j) {
- int x = (2 * j + 1 - w);
- int y = (2 * i + 1 - h);
- double d = (a[0] * x + a[1] * y) / asqrt;
- const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
- wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
- const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
- wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
- }
+ static const double smoother_param = 2.85;
+ const int a[2] = { 2, 1 };
+ const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; ++j) {
+ int x = (2 * j + 1 - w);
+ int y = (2 * i + 1 - h);
+ double d = (a[0] * x + a[1] * y) / asqrt;
+ const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
+ wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
+ const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
+ wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
}
-#endif // MASK_MASTER_SIZE == 64
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
- wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
- wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
- wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - msk;
- wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
- wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - msk;
- wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
- wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
- msk;
- const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
- wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
- wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
- wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - mskx;
- }
+ }
+#endif // USE_PRECOMPUTED_WEDGE_MASK
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; ++j) {
+ const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
+ wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
+ wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - msk;
+ wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
+ wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - msk;
+ wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
+ const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
+ wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
+ wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
+ wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - mskx;
}
}
}
+#if !USE_PRECOMPUTED_WEDGE_SIGN
// If the signs for the wedges for various blocksizes are
// inconsistent flip the sign flag. Do it only once for every
// wedge codebook.
@@ -774,28 +491,29 @@ static void init_wedge_signs() {
const int wbits = wedge_params.bits;
const int wtypes = 1 << wbits;
int i, w;
- if (wbits == 0) continue;
- for (w = 0; w < wtypes; ++w) {
- // Get the mask master, i.e. index [0]
- const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
- int avg = 0;
- for (i = 0; i < bw; ++i) avg += mask[i];
- for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
- avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
- // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
- // If default sign is 1:
- // If sign requested is 0, we need to flip the sign and return
- // the complement i.e. index [1] instead. If sign requested is 1
- // we need to flip the sign and return index [0] instead.
- // If default sign is 0:
- // If sign requested is 0, we need to return index [0] the master
- // if sign requested is 1, we need to return the complement index [1]
- // instead.
- wedge_params.signflip[w] = (avg < 32);
- // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
+ if (wbits) {
+ for (w = 0; w < wtypes; ++w) {
+ // Get the mask master, i.e. index [0]
+ const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
+ int avg = 0;
+ for (i = 0; i < bw; ++i) avg += mask[i];
+ for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
+ avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
+ // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
+ // If default sign is 1:
+ // If sign requested is 0, we need to flip the sign and return
+ // the complement i.e. index [1] instead. If sign requested is 1
+ // we need to flip the sign and return index [0] instead.
+ // If default sign is 0:
+ // If sign requested is 0, we need to return index [0] the master
+ // if sign requested is 1, we need to return the complement index [1]
+ // instead.
+ wedge_params.signflip[w] = (avg < 32);
+ }
}
}
}
+#endif // !USE_PRECOMPUTED_WEDGE_SIGN
static void init_wedge_masks() {
uint8_t *dst = wedge_mask_buf;
@@ -830,83 +548,32 @@ static void init_wedge_masks() {
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
void av1_init_wedge_masks() {
init_wedge_master_masks();
+#if !USE_PRECOMPUTED_WEDGE_SIGN
init_wedge_signs();
+#endif // !USE_PRECOMPUTED_WEDGE_SIGN
init_wedge_masks();
}
-#if CONFIG_SUPERTX
-static void build_masked_compound_wedge_extend(
- uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
- int wedge_offset_x, int wedge_offset_y, int h, int w) {
- const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
- const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
- const uint8_t *mask;
- size_t mask_stride;
- switch (comp_data->interinter_compound_type) {
- case COMPOUND_WEDGE:
- mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
- sb_type, wedge_offset_x, wedge_offset_y);
- mask_stride = MASK_MASTER_STRIDE;
- break;
-#if CONFIG_COMPOUND_SEGMENT
- case COMPOUND_SEG:
- mask = comp_data->seg_mask;
- mask_stride = block_size_wide[sb_type];
- break;
-#endif
- default: assert(0); return;
- }
- aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, (int)mask_stride, h, w, subh, subw);
-}
-
-#if CONFIG_HIGHBITDEPTH
-static void build_masked_compound_wedge_extend_highbd(
- uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
- const uint8_t *src1_8, int src1_stride,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
- int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) {
- const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
- const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
- const uint8_t *mask;
- size_t mask_stride;
- switch (comp_data->interinter_compound_type) {
- case COMPOUND_WEDGE:
- mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
- sb_type, wedge_offset_x, wedge_offset_y);
- mask_stride = MASK_MASTER_STRIDE;
- break;
-#if CONFIG_COMPOUND_SEGMENT
- case COMPOUND_SEG:
- mask = comp_data->seg_mask;
- mask_stride = block_size_wide[sb_type];
- break;
-#endif
- default: assert(0); return;
- }
- aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
- src1_stride, mask, (int)mask_stride, h, w, subh,
- subw, bd);
-}
-#endif // CONFIG_HIGHBITDEPTH
-#else
-#if CONFIG_CONVOLVE_ROUND
static void build_masked_compound_no_round(
- CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
+ uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
+ const CONV_BUF_TYPE *src1, int src1_stride,
const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
- int w) {
+ int w, ConvolveParams *conv_params, MACROBLOCKD *xd) {
// Derive subsampling from h and w passed in. May be refactored to
// pass in subsampling factors directly.
- const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
- const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const int subh = (2 << mi_size_high_log2[sb_type]) == h;
+ const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
- aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, block_size_wide[sb_type], h, w, subh, subw);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, block_size_wide[sb_type],
+ w, h, subw, subh, conv_params, xd->bd);
+ else
+ aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, block_size_wide[sb_type], w,
+ h, subw, subh, conv_params);
}
-#endif // CONFIG_CONVOLVE_ROUND
+
static void build_masked_compound(
uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
@@ -914,14 +581,13 @@ static void build_masked_compound(
int w) {
// Derive subsampling from h and w passed in. May be refactored to
// pass in subsampling factors directly.
- const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
- const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const int subh = (2 << mi_size_high_log2[sb_type]) == h;
+ const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, block_size_wide[sb_type], h, w, subh, subw);
+ mask, block_size_wide[sb_type], w, h, subw, subh);
}
-#if CONFIG_HIGHBITDEPTH
static void build_masked_compound_highbd(
uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
const uint8_t *src1_8, int src1_stride,
@@ -929,320 +595,259 @@ static void build_masked_compound_highbd(
int w, int bd) {
// Derive subsampling from h and w passed in. May be refactored to
// pass in subsampling factors directly.
- const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
- const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const int subh = (2 << mi_size_high_log2[sb_type]) == h;
+ const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
// const uint8_t *mask =
// av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
- src1_stride, mask, block_size_wide[sb_type], h, w,
- subh, subw, bd);
+ src1_stride, mask, block_size_wide[sb_type], w, h,
+ subw, subh, bd);
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_SUPERTX
void av1_make_masked_inter_predictor(
const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
- const int subpel_x, const int subpel_y, const struct scale_factors *sf,
- int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
- int xs, int ys,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- int plane,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
+ int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- MACROBLOCKD *xd) {
- const MODE_INFO *mi = xd->mi[0];
-
- const INTERINTER_COMPOUND_DATA comp_data = {
-#if CONFIG_WEDGE
- mi->mbmi.wedge_index,
- mi->mbmi.wedge_sign,
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- mi->mbmi.mask_type,
- xd->seg_mask,
-#endif // CONFIG_COMPOUND_SEGMENT
- mi->mbmi.interinter_compound_type
- };
+ MACROBLOCKD *xd, int can_use_previous) {
+ MB_MODE_INFO *mi = xd->mi[0];
+ (void)dst;
+ (void)dst_stride;
+ mi->interinter_comp.seg_mask = xd->seg_mask;
+ const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp;
// We're going to call av1_make_inter_predictor to generate a prediction into
// a temporary buffer, then will blend that temporary buffer with that from
// the other reference.
//
-// With CONFIG_CONVOLVE_ROUND, if the rounding mode is CONVOLVE_OPT_NO_ROUND
-// then the predictions are at 32-bits, so we'll need 32 bits per
-// pixel. Otherwise, we'll need up to 16 bits per pixel if
-// CONFIG_HIGHBITDEPTH or just 8 otherwise.
-#if CONFIG_CONVOLVE_ROUND
-#define INTER_PRED_BYTES_PER_PIXEL 4
-#elif CONFIG_HIGHBITDEPTH
#define INTER_PRED_BYTES_PER_PIXEL 2
-#else
-#define INTER_PRED_BYTES_PER_PIXEL 1
-#endif
- DECLARE_ALIGNED(16, uint8_t,
+
+ DECLARE_ALIGNED(32, uint8_t,
tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
#undef INTER_PRED_BYTES_PER_PIXEL
-#if CONFIG_HIGHBITDEPTH
uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
? CONVERT_TO_BYTEPTR(tmp_buf)
: tmp_buf;
- const int bd = xd->bd;
-#else
- uint8_t *tmp_dst = tmp_buf;
- const int bd = 8;
-#endif
-#if CONFIG_CONVOLVE_ROUND
const int tmp_buf_stride = MAX_SB_SIZE;
- const int is_conv_no_round = conv_params->round == CONVOLVE_OPT_NO_ROUND;
CONV_BUF_TYPE *org_dst = conv_params->dst;
int org_dst_stride = conv_params->dst_stride;
- CONV_BUF_TYPE *tmp_buf32 = (CONV_BUF_TYPE *)tmp_buf;
- if (is_conv_no_round) {
- conv_params->dst = tmp_buf32;
- conv_params->dst_stride = tmp_buf_stride;
- assert(conv_params->do_average == 0);
- }
-#endif // CONFIG_CONVOLVE_ROUND
+ CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
+ conv_params->dst = tmp_buf16;
+ conv_params->dst_stride = tmp_buf_stride;
+ assert(conv_params->do_average == 0);
// This will generate a prediction in tmp_buf for the second reference
- av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
- subpel_y, sf, w, h, conv_params, interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- warp_types, p_col, p_row, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- mi, 0,
-#endif
- xs, ys, xd);
-
-#if CONFIG_COMPOUND_SEGMENT
- if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
-#if CONFIG_CONVOLVE_ROUND
- if (is_conv_no_round) {
- build_compound_seg_mask_d32(
- comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
- tmp_buf32, tmp_buf_stride, mi->mbmi.sb_type, h, w, conv_params, bd);
- } else {
-#endif // CONFIG_CONVOLVE_ROUND
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
- mi->mbmi.sb_type, h, w, bd);
- } else {
-#endif
- build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
- dst_stride, tmp_dst, MAX_SB_SIZE,
- mi->mbmi.sb_type, h, w);
-#if CONFIG_HIGHBITDEPTH
- }
-#endif
-#if CONFIG_CONVOLVE_ROUND
- }
-#endif
- }
-#endif // CONFIG_COMPOUND_SEGMENT
+ av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params,
+ sf, w, h, conv_params, interp_filters, warp_types,
+ p_col, p_row, plane, ref, mi, 0, xd,
+ can_use_previous);
-#if CONFIG_SUPERTX
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_masked_compound_wedge_extend_highbd(
- dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
- mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- build_masked_compound_wedge_extend(
- dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
- mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
-#else
-#if CONFIG_CONVOLVE_ROUND
- if (is_conv_no_round) {
- build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
- org_dst_stride, tmp_buf32, tmp_buf_stride,
- &comp_data, mi->mbmi.sb_type, h, w);
-
- const int convolve_rounding_bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- av1_highbd_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w,
- h, convolve_rounding_bits, xd->bd);
- else
-#endif
- av1_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w, h,
- convolve_rounding_bits);
-
- conv_params->do_post_rounding = 0;
- } else {
-#endif // CONFIG_CONVOLVE_ROUND
-
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
- MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
- w, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
- MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
-#if CONFIG_CONVOLVE_ROUND
+ if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
+ av1_build_compound_diffwtd_mask_d16(
+ comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
+ tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd);
}
-#endif // CONFIG_CONVOLVE_ROUND
-#endif // CONFIG_SUPERTX
-
-#if CONFIG_COMPOUND_SEGMENT
- (void)plane;
-#endif // CONFIG_COMPOUND_SEGMENT
+ build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
+ tmp_buf16, tmp_buf_stride, comp_data,
+ mi->sb_type, h, w, conv_params, xd);
}
// TODO(sarahparker) av1_highbd_build_inter_predictor and
// av1_build_inter_predictor should be combined with
// av1_make_inter_predictor
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_build_inter_predictor(
const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
- InterpFilters interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- const WarpTypesAllowed *warp_types, int p_col, int p_row,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- int plane, enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd) {
+ InterpFilters interp_filters, const WarpTypesAllowed *warp_types, int p_col,
+ int p_row, int plane, enum mv_precision precision, int x, int y,
+ const MACROBLOCKD *xd, int can_use_previous) {
const int is_q4 = precision == MV_PRECISION_Q4;
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
is_q4 ? src_mv->col : src_mv->col * 2 };
MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
mv.col += SCALE_EXTRA_OFF;
mv.row += SCALE_EXTRA_OFF;
- const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
- const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
- ConvolveParams conv_params = get_conv_params(ref, ref, plane);
+ const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
+ mv.col & SCALE_SUBPEL_MASK,
+ mv.row & SCALE_SUBPEL_MASK };
+ ConvolveParams conv_params = get_conv_params(ref, 0, plane, xd->bd);
src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
(mv.col >> SCALE_SUBPEL_BITS);
- av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, &conv_params, interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- warp_types, p_col, p_row, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif
- sf->x_step_q4, sf->y_step_q4, xd);
+ av1_make_inter_predictor(src, src_stride, dst, dst_stride, &subpel_params, sf,
+ w, h, &conv_params, interp_filters, warp_types,
+ p_col, p_row, plane, ref, xd->mi[0], 0, xd,
+ can_use_previous);
}
-#endif // CONFIG_HIGHBITDEPTH
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, const MV *src_mv,
const struct scale_factors *sf, int w, int h,
ConvolveParams *conv_params,
InterpFilters interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
const WarpTypesAllowed *warp_types, int p_col,
int p_row, int plane, int ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd, int can_use_previous) {
const int is_q4 = precision == MV_PRECISION_Q4;
const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
is_q4 ? src_mv->col : src_mv->col * 2 };
MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
mv.col += SCALE_EXTRA_OFF;
mv.row += SCALE_EXTRA_OFF;
- const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
- const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
+ const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
+ mv.col & SCALE_SUBPEL_MASK,
+ mv.row & SCALE_SUBPEL_MASK };
src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
(mv.col >> SCALE_SUBPEL_BITS);
- av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, conv_params, interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- warp_types, p_col, p_row, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif
- sf->x_step_q4, sf->y_step_q4, xd);
+ av1_make_inter_predictor(src, src_stride, dst, dst_stride, &subpel_params, sf,
+ w, h, conv_params, interp_filters, warp_types, p_col,
+ p_row, plane, ref, xd->mi[0], 0, xd,
+ can_use_previous);
+}
+
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+ int order_idx, int *fwd_offset, int *bck_offset,
+ int *use_jnt_comp_avg, int is_compound) {
+ assert(fwd_offset != NULL && bck_offset != NULL);
+ if (!is_compound || mbmi->compound_idx) {
+ *use_jnt_comp_avg = 0;
+ return;
+ }
+
+ *use_jnt_comp_avg = 1;
+ const int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
+ const int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
+ const int cur_frame_index = cm->cur_frame->cur_frame_offset;
+ int bck_frame_index = 0, fwd_frame_index = 0;
+
+ if (bck_idx >= 0) {
+ bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
+ }
+
+ if (fwd_idx >= 0) {
+ fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
+ }
+
+ int d0 = clamp(abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index)),
+ 0, MAX_FRAME_DISTANCE);
+ int d1 = clamp(abs(get_relative_dist(cm, cur_frame_index, bck_frame_index)),
+ 0, MAX_FRAME_DISTANCE);
+
+ const int order = d0 <= d1;
+
+ if (d0 == 0 || d1 == 0) {
+ *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
+ *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
+ return;
+ }
+
+ int i;
+ for (i = 0; i < 3; ++i) {
+ int c0 = quant_dist_weight[i][order];
+ int c1 = quant_dist_weight[i][!order];
+ int d0_c0 = d0 * c0;
+ int d1_c1 = d1 * c1;
+ if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
+ }
+
+ *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
+ *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
+}
+
+static INLINE void calc_subpel_params(
+ MACROBLOCKD *xd, const struct scale_factors *const sf, const MV mv,
+ int plane, const int pre_x, const int pre_y, int x, int y,
+ struct buf_2d *const pre_buf, uint8_t **pre, SubpelParams *subpel_params,
+ int bw, int bh) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int is_scaled = av1_is_scaled(sf);
+ if (is_scaled) {
+ int ssx = pd->subsampling_x;
+ int ssy = pd->subsampling_y;
+ int orig_pos_y = (pre_y + y) << SUBPEL_BITS;
+ orig_pos_y += mv.row * (1 << (1 - ssy));
+ int orig_pos_x = (pre_x + x) << SUBPEL_BITS;
+ orig_pos_x += mv.col * (1 << (1 - ssx));
+ int pos_y = sf->scale_value_y(orig_pos_y, sf);
+ int pos_x = sf->scale_value_x(orig_pos_x, sf);
+ pos_x += SCALE_EXTRA_OFF;
+ pos_y += SCALE_EXTRA_OFF;
+
+ const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
+ const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
+ const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
+ << SCALE_SUBPEL_BITS;
+ const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
+ pos_y = clamp(pos_y, top, bottom);
+ pos_x = clamp(pos_x, left, right);
+
+ *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+ (pos_x >> SCALE_SUBPEL_BITS);
+ subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+ subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+ subpel_params->xs = sf->x_step_q4;
+ subpel_params->ys = sf->y_step_q4;
+ } else {
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(
+ xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+ subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
+ subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+ subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+ *pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
+ (x + (mv_q4.col >> SUBPEL_BITS));
+ }
}
-typedef struct SubpelParams {
- int xs;
- int ys;
- int subpel_x;
- int subpel_y;
-} SubpelParams;
-
-static INLINE void build_inter_predictors(
- const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
-#if CONFIG_MOTION_VAR
- const MODE_INFO *mi, int build_for_obmc,
-#endif // CONFIG_MOTION_VAR
- int block, int bw, int bh, int x, int y, int w, int h,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- int mi_x, int mi_y) {
+static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ int plane, const MB_MODE_INFO *mi,
+ int build_for_obmc, int bw, int bh,
+ int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
-#if !CONFIG_MOTION_VAR
- const MODE_INFO *mi = xd->mi[0];
-#endif // CONFIG_MOTION_VAR
- int is_compound = has_second_ref(&mi->mbmi);
-#if CONFIG_COMPOUND_SINGLEREF
- int is_comp_mode_pred =
- is_compound || is_inter_singleref_comp_mode(mi->mbmi.mode);
-#endif // CONFIG_COMPOUND_SINGLEREF
+ int is_compound = has_second_ref(mi);
int ref;
-#if CONFIG_INTRABC
- const int is_intrabc = is_intrabc_block(&mi->mbmi);
+ const int is_intrabc = is_intrabc_block(mi);
assert(IMPLIES(is_intrabc, !is_compound));
-#endif // CONFIG_INTRABC
-#if CONFIG_GLOBAL_MOTION
int is_global[2] = { 0, 0 };
for (ref = 0; ref < 1 + is_compound; ++ref) {
- WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
- is_global[ref] = is_global_mv_block(mi, block, wm->wmtype);
+ const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
+ is_global[ref] = is_global_mv_block(mi, wm->wmtype);
}
-#if CONFIG_COMPOUND_SINGLEREF
- if (!is_compound && is_comp_mode_pred) is_global[1] = is_global[0];
-#endif // CONFIG_COMPOUND_SINGLEREF
-#endif // CONFIG_GLOBAL_MOTION
-
-#if CONFIG_CB4X4
- (void)block;
- (void)cm;
-#endif
-#if CONFIG_CHROMA_SUB8X8
- const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
const int ss_x = pd->subsampling_x;
const int ss_y = pd->subsampling_y;
- int sub8x8_inter = bsize < BLOCK_8X8 && (ss_x || ss_y);
-
-#if CONFIG_INTRABC
- if (is_intrabc) {
- sub8x8_inter = 0;
- }
-#endif
+ int sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
+ (block_size_high[bsize] < 8 && ss_y);
+
+ if (is_intrabc) sub8x8_inter = 0;
+
+ // For sub8x8 chroma blocks, we may be covering more than one luma block's
+ // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
+ // the top-left corner of the prediction source - the correct top-left corner
+ // is at (pre_x, pre_y).
+ const int row_start =
+ (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
+ const int col_start =
+ (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
+ const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
+ const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
-#if CONFIG_MOTION_VAR
sub8x8_inter = sub8x8_inter && !build_for_obmc;
-#endif // CONFIG_MOTION_VAR
- const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
- const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
-
if (sub8x8_inter) {
- for (int row = row_start; row <= 0 && sub8x8_inter; ++row)
- for (int col = col_start; col <= 0; ++col)
- if (!is_inter_block(&xd->mi[row * xd->mi_stride + col]->mbmi))
- sub8x8_inter = 0;
+ for (int row = row_start; row <= 0 && sub8x8_inter; ++row) {
+ for (int col = col_start; col <= 0; ++col) {
+ const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
+ if (!is_inter_block(this_mbmi)) sub8x8_inter = 0;
+ if (is_intrabc_block(this_mbmi)) sub8x8_inter = 0;
+ }
+ }
}
if (sub8x8_inter) {
@@ -1252,178 +857,67 @@ static INLINE void build_inter_predictors(
const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
const int b8_w = block_size_wide[plane_bsize] >> ss_x;
const int b8_h = block_size_high[plane_bsize] >> ss_y;
- int idx, idy;
-
- const int x_base = x;
- const int y_base = y;
+ assert(!is_compound);
const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
int row = row_start;
- for (idy = 0; idy < b8_h; idy += b4_h) {
+ for (int y = 0; y < b8_h; y += b4_h) {
int col = col_start;
- for (idx = 0; idx < b8_w; idx += b4_w) {
- MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
+ for (int x = 0; x < b8_w; x += b4_w) {
+ MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
is_compound = has_second_ref(this_mbmi);
-#if CONFIG_CONVOLVE_ROUND
- DECLARE_ALIGNED(16, int32_t, tmp_dst[8 * 8]);
+ DECLARE_ALIGNED(32, CONV_BUF_TYPE, tmp_dst[8 * 8]);
int tmp_dst_stride = 8;
- assert(w <= 8 && h <= 8);
-#endif // CONFIG_CONVOLVE_ROUND
-#if CONFIG_CONVOLVE_ROUND
- ConvolveParams conv_params =
- get_conv_params_no_round(0, 0, plane, tmp_dst, tmp_dst_stride);
-#else
- ConvolveParams conv_params = get_conv_params(0, 0, plane);
-#endif
+ assert(bw < 8 || bh < 8);
+ ConvolveParams conv_params = get_conv_params_no_round(
+ 0, 0, plane, tmp_dst, tmp_dst_stride, is_compound, xd->bd);
+ conv_params.use_jnt_comp_avg = 0;
struct buf_2d *const dst_buf = &pd->dst;
- x = x_base + idx;
- y = y_base + idy;
uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
- // TODO(zoeliu): If single ref comp modes are considered here, a
- // mismatch was caused. Need a further investigation.
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const RefBuffer *ref_buf =
- &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
-
- const int c_offset = (mi_x + MI_SIZE * col_start) >> ss_x;
- const int r_offset = (mi_y + MI_SIZE * row_start) >> ss_y;
- pd->pre[ref].buf0 =
- (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
- pd->pre[ref].buf =
- pd->pre[ref].buf0 + scaled_buffer_offset(c_offset, r_offset,
- ref_buf->buf->uv_stride,
- &ref_buf->sf);
- pd->pre[ref].width = ref_buf->buf->uv_crop_width;
- pd->pre[ref].height = ref_buf->buf->uv_crop_height;
- pd->pre[ref].stride = ref_buf->buf->uv_stride;
-
-#if CONFIG_INTRABC
- const struct scale_factors *const sf =
- is_intrabc ? &xd->sf_identity : &ref_buf->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-#else
- const struct scale_factors *const sf = &ref_buf->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
-#endif // CONFIG_INTRABC
-
- const MV mv = this_mbmi->mv[ref].as_mv;
-
- uint8_t *pre;
- int xs, ys, subpel_x, subpel_y;
- const int is_scaled = av1_is_scaled(sf);
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- WarpTypesAllowed warp_types;
-#if CONFIG_GLOBAL_MOTION
- warp_types.global_warp_allowed = is_global[ref];
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- warp_types.local_warp_allowed =
- this_mbmi->motion_mode == WARPED_CAUSAL;
-#endif // CONFIG_WARPED_MOTION
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-
- if (is_scaled) {
- int ssx = pd->subsampling_x;
- int ssy = pd->subsampling_y;
- int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS);
- orig_pos_y += mv.row * (1 << (1 - ssy));
- int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS);
- orig_pos_x += mv.col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- pre = pre_buf->buf0 +
- (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
- subpel_x = pos_x & SCALE_SUBPEL_MASK;
- subpel_y = pos_y & SCALE_SUBPEL_MASK;
- xs = sf->x_step_q4;
- ys = sf->y_step_q4;
- } else {
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- xs = ys = SCALE_SUBPEL_SHIFTS;
- subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- pre = pre_buf->buf +
- (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
- (x + (mv_q4.col >> SUBPEL_BITS));
- }
-
- conv_params.ref = ref;
- conv_params.do_average = ref;
- if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) {
- // masked compound type has its own average mechanism
- conv_params.do_average = 0;
-#if CONFIG_CONVOLVE_ROUND && CONFIG_COMPOUND_SEGMENT && CONFIG_SUPERTX
- // TODO(angiebird): convolve_round does not support compound_segment
- // when supertx is on
- conv_params = get_conv_params(ref, 0, plane);
-#endif
- }
- if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
- av1_make_masked_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
- sf, b4_w, b4_h, &conv_params, mi->mbmi.interp_filters, xs, ys,
-#if CONFIG_SUPERTX
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_SUPERTX
- plane,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- &warp_types, (mi_x >> pd->subsampling_x) + x,
- (mi_y >> pd->subsampling_y) + y, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- xd);
- else
- av1_make_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
- sf, b4_w, b4_h, &conv_params, this_mbmi->interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- &warp_types, (mi_x >> pd->subsampling_x) + x,
- (mi_y >> pd->subsampling_y) + y, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- mi, build_for_obmc,
-#endif // CONFIG_MOTION_VAR
- xs, ys, xd);
- } // for (ref = 0; ref < 1 + is_compound; ++ref)
-#if CONFIG_CONVOLVE_ROUND
- if (conv_params.do_post_rounding) {
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- av1_highbd_convolve_rounding(
- tmp_dst, tmp_dst_stride, dst, dst_buf->stride, b4_w, b4_h,
- FILTER_BITS * 2 + is_compound - conv_params.round_0 -
- conv_params.round_1,
- xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_COMPOUND_SINGLEREF
- av1_convolve_rounding(
- tmp_dst, tmp_dst_stride, dst, dst_buf->stride, b4_w, b4_h,
- FILTER_BITS * 2 + is_comp_mode_pred - conv_params.round_0 -
- conv_params.round_1);
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- av1_convolve_rounding(tmp_dst, tmp_dst_stride, dst, dst_buf->stride,
- b4_w, b4_h,
- FILTER_BITS * 2 + is_compound -
- conv_params.round_0 - conv_params.round_1);
-#endif // CONFIG_COMPOUND_SINGLEREF
+ ref = 0;
+ const RefBuffer *ref_buf =
+ &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
+
+ pd->pre[ref].buf0 =
+ (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
+ pd->pre[ref].buf =
+ pd->pre[ref].buf0 + scaled_buffer_offset(pre_x, pre_y,
+ ref_buf->buf->uv_stride,
+ &ref_buf->sf);
+ pd->pre[ref].width = ref_buf->buf->uv_crop_width;
+ pd->pre[ref].height = ref_buf->buf->uv_crop_height;
+ pd->pre[ref].stride = ref_buf->buf->uv_stride;
+
+ const struct scale_factors *const sf =
+ is_intrabc ? &cm->sf_identity : &ref_buf->sf;
+ struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
+
+ const MV mv = this_mbmi->mv[ref].as_mv;
+
+ uint8_t *pre;
+ SubpelParams subpel_params;
+ WarpTypesAllowed warp_types;
+ warp_types.global_warp_allowed = is_global[ref];
+ warp_types.local_warp_allowed = this_mbmi->motion_mode == WARPED_CAUSAL;
+
+ calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
+ &subpel_params, bw, bh);
+
+ conv_params.ref = ref;
+ conv_params.do_average = ref;
+ if (is_masked_compound_type(mi->interinter_comp.type)) {
+ // masked compound type has its own average mechanism
+ conv_params.do_average = 0;
}
-#endif // CONFIG_CONVOLVE_ROUND
+
+ av1_make_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf,
+ b4_w, b4_h, &conv_params, this_mbmi->interp_filters, &warp_types,
+ (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y,
+ plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion);
+
++col;
}
++row;
@@ -1432,194 +926,50 @@ static INLINE void build_inter_predictors(
for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
return;
}
-#else
- (void)cm;
-#endif // CONFIG_CHROMA_SUB8X8
{
+ DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
+ ConvolveParams conv_params = get_conv_params_no_round(
+ 0, 0, plane, tmp_dst, MAX_SB_SIZE, is_compound, xd->bd);
+ av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset,
+ &conv_params.bck_offset,
+ &conv_params.use_jnt_comp_avg, is_compound);
+
struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- uint8_t *pre[2];
- SubpelParams subpel_params[2];
-#if CONFIG_CONVOLVE_ROUND
- DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
-#endif // CONFIG_CONVOLVE_ROUND
-
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref)
-#else
- for (ref = 0; ref < 1 + is_compound; ++ref)
-#endif // CONFIG_COMPOUND_SINGLEREF
- {
-#if CONFIG_INTRABC
+ uint8_t *const dst = dst_buf->buf;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf =
- is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
+ is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-#else
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
-#endif // CONFIG_INTRABC
-#if CONFIG_CB4X4
- const MV mv = mi->mbmi.mv[ref].as_mv;
-#else
- const MV mv =
-#if CONFIG_MOTION_VAR
- (mi->mbmi.sb_type < BLOCK_8X8 && !build_for_obmc)
- ?
-#else
- mi->mbmi.sb_type < BLOCK_8X8 ?
-#endif
- average_split_mvs(pd, mi, ref, block)
- : mi->mbmi.mv[ref].as_mv;
-#endif
-
- const int is_scaled = av1_is_scaled(sf);
- if (is_scaled) {
- // Note: The various inputs here have different units:
- // * mi_x/mi_y are in units of luma pixels
- // * mv is in units of 1/8 luma pixels
- // * x/y are in units of pixels *in the current plane*
- // Here we unify these into a q4-format position within the current
- // plane, then project into the reference frame
- int ssx = pd->subsampling_x;
- int ssy = pd->subsampling_y;
- int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS);
- orig_pos_y += mv.row * (1 << (1 - ssy));
- int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS);
- orig_pos_x += mv.col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- // Clamp against the reference frame borders, with enough extension
- // that we don't force the reference block to be partially onscreen.
- const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- pre[ref] = pre_buf->buf0 +
- (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
- subpel_params[ref].subpel_x = pos_x & SCALE_SUBPEL_MASK;
- subpel_params[ref].subpel_y = pos_y & SCALE_SUBPEL_MASK;
- subpel_params[ref].xs = sf->x_step_q4;
- subpel_params[ref].ys = sf->y_step_q4;
- } else {
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- subpel_params[ref].subpel_x = (mv_q4.col & SUBPEL_MASK)
- << SCALE_EXTRA_BITS;
- subpel_params[ref].subpel_y = (mv_q4.row & SUBPEL_MASK)
- << SCALE_EXTRA_BITS;
- subpel_params[ref].xs = SCALE_SUBPEL_SHIFTS;
- subpel_params[ref].ys = SCALE_SUBPEL_SHIFTS;
- pre[ref] = pre_buf->buf +
- (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
- (x + (mv_q4.col >> SUBPEL_BITS));
- }
- }
+ const MV mv = mi->mv[ref].as_mv;
-#if CONFIG_CONVOLVE_ROUND
- ConvolveParams conv_params =
- get_conv_params_no_round(ref, ref, plane, tmp_dst, MAX_SB_SIZE);
-#else
- ConvolveParams conv_params = get_conv_params(ref, ref, plane);
-#endif // CONFIG_CONVOLVE_ROUND
+ uint8_t *pre;
+ SubpelParams subpel_params;
+ calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, 0, 0, pre_buf, &pre,
+ &subpel_params, bw, bh);
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref)
-#else
- for (ref = 0; ref < 1 + is_compound; ++ref)
-#endif // CONFIG_COMPOUND_SINGLEREF
- {
-#if CONFIG_INTRABC
- const struct scale_factors *const sf =
- is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-#else
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
-#endif // CONFIG_INTRABC
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
WarpTypesAllowed warp_types;
-#if CONFIG_GLOBAL_MOTION
warp_types.global_warp_allowed = is_global[ref];
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
-#endif // CONFIG_WARPED_MOTION
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
conv_params.ref = ref;
- conv_params.do_average = ref;
- if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) {
+
+ if (ref && is_masked_compound_type(mi->interinter_comp.type)) {
// masked compound type has its own average mechanism
conv_params.do_average = 0;
-#if CONFIG_CONVOLVE_ROUND && CONFIG_COMPOUND_SEGMENT && CONFIG_SUPERTX
- // TODO(angiebird): convolve_round does not support compound_segment
- // when supertx is on
- conv_params = get_conv_params(ref, 0, plane);
-#endif
- }
-
- if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
av1_make_masked_inter_predictor(
- pre[ref], pre_buf->stride, dst, dst_buf->stride,
- subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
- &conv_params, mi->mbmi.interp_filters, subpel_params[ref].xs,
- subpel_params[ref].ys,
-#if CONFIG_SUPERTX
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_SUPERTX
- plane,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- &warp_types, (mi_x >> pd->subsampling_x) + x,
- (mi_y >> pd->subsampling_y) + y, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- xd);
- else
+ pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
+ bh, &conv_params, mi->interp_filters, plane, &warp_types,
+ mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, ref, xd,
+ cm->allow_warped_motion);
+ } else {
+ conv_params.do_average = ref;
av1_make_inter_predictor(
- pre[ref], pre_buf->stride, dst, dst_buf->stride,
- subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
- &conv_params, mi->mbmi.interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- &warp_types, (mi_x >> pd->subsampling_x) + x,
- (mi_y >> pd->subsampling_y) + y, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- mi, build_for_obmc,
-#endif // CONFIG_MOTION_VAR
- subpel_params[ref].xs, subpel_params[ref].ys, xd);
- }
-
-#if CONFIG_CONVOLVE_ROUND
- // TODO(angiebird): This part needs optimization
- if (conv_params.do_post_rounding) {
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- av1_highbd_convolve_rounding(
- tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
- FILTER_BITS * 2 + is_compound - conv_params.round_0 -
- conv_params.round_1,
- xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_COMPOUND_SINGLEREF
- av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
- FILTER_BITS * 2 + is_comp_mode_pred -
- conv_params.round_0 - conv_params.round_1);
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
- FILTER_BITS * 2 + is_compound -
- conv_params.round_0 - conv_params.round_1);
-#endif // CONFIG_COMPOUND_SINGLEREF
+ pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
+ bh, &conv_params, mi->interp_filters, &warp_types,
+ mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, plane, ref,
+ mi, build_for_obmc, xd, cm->allow_warped_motion);
+ }
}
-#endif // CONFIG_CONVOLVE_ROUND
}
}
@@ -1630,56 +980,16 @@ static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
int plane;
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
-#if CONFIG_CB4X4
- const int unify_bsize = 1;
-#else
- const int unify_bsize = 0;
-#endif
for (plane = plane_from; plane <= plane_to; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = pd->width;
const int bh = pd->height;
-#if CONFIG_CB4X4
if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
pd->subsampling_y))
continue;
-#endif
- if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !unify_bsize) {
- const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int pw = 8 >> (have_vsplit | pd->subsampling_x);
- const int ph = 8 >> (have_hsplit | pd->subsampling_y);
- int x, y;
- assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
- assert(bsize == BLOCK_8X8);
- assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x)
- build_inter_predictors(cm, xd, plane,
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif // CONFIG_MOTION_VAR
- y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- } else {
- build_inter_predictors(cm, xd, plane,
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif // CONFIG_MOTION_VAR
- 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
+ build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
}
}
@@ -1687,17 +997,14 @@ void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
-#if CONFIG_INTERINTRA
- if (is_interintra_pred(&xd->mi[0]->mbmi)) {
+
+ if (is_interintra_pred(xd->mi[0])) {
BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL },
{ xd->plane[0].dst.stride, 0, 0 } };
if (!ctx) ctx = &default_ctx;
av1_build_interintra_predictors_sby(cm, xd, xd->plane[0].dst.buf,
xd->plane[0].dst.stride, ctx, bsize);
}
-#else
- (void)ctx;
-#endif // CONFIG_INTERINTRA
}
void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -1705,8 +1012,8 @@ void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
BLOCK_SIZE bsize) {
build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
MAX_MB_PLANE - 1);
-#if CONFIG_INTERINTRA
- if (is_interintra_pred(&xd->mi[0]->mbmi)) {
+
+ if (is_interintra_pred(xd->mi[0])) {
BUFFER_SET default_ctx = {
{ NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
{ 0, xd->plane[1].dst.stride, xd->plane[2].dst.stride }
@@ -1716,247 +1023,49 @@ void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
cm, xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
xd->plane[1].dst.stride, xd->plane[2].dst.stride, ctx, bsize);
}
-#else
- (void)ctx;
-#endif // CONFIG_INTERINTRA
}
void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize) {
+ const int num_planes = av1_num_planes(cm);
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
- av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
+ if (num_planes > 1)
+ av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
}
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
- const YV12_BUFFER_CONFIG *src, int mi_row,
- int mi_col) {
- const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
- src->uv_crop_width };
- const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
- src->uv_crop_height };
- const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
- src->uv_stride };
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
+ const int plane_start, const int plane_end) {
+ // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
+ // the static analysis warnings.
+ for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
struct macroblockd_plane *const pd = &planes[i];
- setup_pred_plane(&pd->dst, bsize, src->buffers[i], widths[i], heights[i],
- strides[i], mi_row, mi_col, NULL, pd->subsampling_x,
- pd->subsampling_y);
+ const int is_uv = i > 0;
+ setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
+ src->crop_heights[is_uv], src->strides[is_uv], mi_row,
+ mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
}
}
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *sf) {
+ const struct scale_factors *sf,
+ const int num_planes) {
if (src != NULL) {
- int i;
- uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
- src->v_buffer };
- const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
- src->uv_crop_width };
- const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
- src->uv_crop_height };
- const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
- src->uv_stride };
- for (i = 0; i < MAX_MB_PLANE; ++i) {
+ // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
+ // the static analysis warnings.
+ for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];
- setup_pred_plane(&pd->pre[idx], xd->mi[0]->mbmi.sb_type, buffers[i],
- widths[i], heights[i], strides[i], mi_row, mi_col, sf,
+ const int is_uv = i > 0;
+ setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
+ src->crop_widths[is_uv], src->crop_heights[is_uv],
+ src->strides[is_uv], mi_row, mi_col, sf,
pd->subsampling_x, pd->subsampling_y);
}
}
}
-#if CONFIG_SUPERTX
-#if CONFIG_CB4X4
-static const uint8_t mask_4[4] = { 64, 52, 12, 0 };
-static const uint8_t mask_4_uv[4] = { 64, 52, 12, 0 };
-#endif // CONFIG_CB4X4
-static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
-
-static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36,
- 28, 21, 14, 9, 6, 4, 2, 1 };
-
-static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
- 61, 57, 52, 45, 36, 28, 19, 12, 7, 3, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
-
-static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36,
- 28, 19, 11, 3, 0, 0, 0, 0 };
-
-static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 60, 54, 46, 36,
- 28, 18, 10, 4, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static const uint8_t *get_supertx_mask(int length, int plane) {
- switch (length) {
-#if CONFIG_CB4X4
- case 4: return plane ? mask_4_uv : mask_4;
-#endif // CONFIG_CB4X4
- case 8: return plane ? mask_8_uv : mask_8;
- case 16: return plane ? mask_16_uv : mask_16;
- case 32: return plane ? mask_32_uv : mask_32;
- default: assert(0);
- }
- return NULL;
-}
-
-void av1_build_masked_inter_predictor_complex(
- MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
- int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
- BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
- int plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int ssx = pd->subsampling_x;
- const int ssy = pd->subsampling_y;
- const int top_w = block_size_wide[top_bsize] >> ssx;
- const int top_h = block_size_high[top_bsize] >> ssy;
- const int w = block_size_wide[bsize] >> ssx;
- const int h = block_size_high[bsize] >> ssy;
- const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx;
- const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy;
-
- int w_remain, h_remain;
-
-#if CONFIG_HIGHBITDEPTH
- const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-#endif // CONFIG_HIGHBITDEPTH
-
- assert(bsize <= BLOCK_32X32);
- assert(IMPLIES(plane == 0, ssx == 0));
- assert(IMPLIES(plane == 0, ssy == 0));
-
- switch (partition) {
- case PARTITION_HORZ: {
- const uint8_t *const mask = get_supertx_mask(h, ssy);
-
- w_remain = top_w;
- h_remain = top_h - h_offset - h;
- dst += h_offset * dst_stride;
- pre += h_offset * pre_stride;
-
-#if CONFIG_HIGHBITDEPTH
- if (is_hdb)
- aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre,
- pre_stride, mask, h, top_w, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
- mask, h, top_w);
-
- dst += h * dst_stride;
- pre += h * pre_stride;
- break;
- }
- case PARTITION_VERT: {
- const uint8_t *const mask = get_supertx_mask(w, ssx);
-
- w_remain = top_w - w_offset - w;
- h_remain = top_h;
- dst += w_offset;
- pre += w_offset;
-
-#if CONFIG_HIGHBITDEPTH
- if (is_hdb)
- aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre,
- pre_stride, mask, top_h, w, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
- mask, top_h, w);
-
- dst += w;
- pre += w;
- break;
- }
- default: {
- assert(0);
- return;
- }
- }
-
- if (w_remain == 0 || h_remain == 0) {
- return;
- }
-
-#if CONFIG_HIGHBITDEPTH
- if (is_hdb) {
- dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst);
- pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre);
- dst_stride *= 2;
- pre_stride *= 2;
- w_remain *= 2;
- }
-#endif // CONFIG_HIGHBITDEPTH
-
- do {
- memcpy(dst, pre, w_remain * sizeof(uint8_t));
- dst += dst_stride;
- pre += pre_stride;
- } while (--h_remain);
-}
-
-void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row_ori,
- int mi_col_ori, int mi_row,
- int mi_col, int plane,
- BLOCK_SIZE bsize, int block) {
- // Prediction function used in supertx:
- // Use the mv at current block (which is less than 8x8)
- // to get prediction of a block located at (mi_row, mi_col) at size of bsize
- // bsize can be larger than 8x8.
- // block (0-3): the sub8x8 location of current block
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
- const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
-
- // For sub8x8 uv:
- // Skip uv prediction in supertx except the first block (block = 0)
- int max_plane = block ? 1 : MAX_MB_PLANE;
- if (plane >= max_plane) return;
-
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- const int bw = 4 * num_4x4_w;
- const int bh = 4 * num_4x4_h;
-
- build_inter_predictors(cm, xd, plane,
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif // CONFIG_MOTION_VAR
- block, bw, bh, 0, 0, bw, bh, wedge_offset_x,
- wedge_offset_y, mi_x, mi_y);
-}
-
-void av1_build_inter_predictor_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row_ori, int mi_col_ori,
- int mi_row, int mi_col, int plane,
- BLOCK_SIZE bsize) {
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
- const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
-
- build_inter_predictors(cm, xd, plane,
-#if CONFIG_MOTION_VAR
- xd->mi[0], 0,
-#endif // CONFIG_MOTION_VAR
- 0, bw, bh, 0, 0, bw, bh, wedge_offset_x,
- wedge_offset_y, mi_x, mi_y);
-}
-#endif // CONFIG_SUPERTX
-
-#if CONFIG_MOTION_VAR
// obmc_mask_N[overlap_position]
static const uint8_t obmc_mask_1[1] = { 64 };
@@ -1974,14 +1083,12 @@ static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
56, 57, 58, 59, 60, 60, 61, 62,
64, 64, 64, 64, 64, 64, 64, 64 };
-#if CONFIG_EXT_PARTITION
static const uint8_t obmc_mask_64[64] = {
33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
};
-#endif // CONFIG_EXT_PARTITION
const uint8_t *av1_get_obmc_mask(int length) {
switch (length) {
@@ -1991,69 +1098,25 @@ const uint8_t *av1_get_obmc_mask(int length) {
case 8: return obmc_mask_8;
case 16: return obmc_mask_16;
case 32: return obmc_mask_32;
-#if CONFIG_EXT_PARTITION
case 64: return obmc_mask_64;
-#endif // CONFIG_EXT_PARTITION
default: assert(0); return NULL;
}
}
-#if CONFIG_NCOBMC
-// obmc_mask_flipN[overlap_position]
-static const uint8_t obmc_mask_flip1[1] = { 55 };
-
-static const uint8_t obmc_mask_flip2[2] = { 62, 45 };
-
-static const uint8_t obmc_mask_flip4[4] = { 64, 59, 50, 39 };
-
-static const uint8_t obmc_mask_flip8[8] = { 64, 63, 61, 57, 53, 48, 42, 36 };
-
-static const uint8_t obmc_mask_flip16[16] = { 64, 64, 64, 63, 61, 60, 58, 56,
- 54, 52, 49, 46, 43, 40, 37, 34 };
-
-static const uint8_t obmc_mask_flip32[32] = { 64, 64, 64, 64, 64, 63, 63, 62,
- 62, 61, 60, 60, 59, 58, 57, 56,
- 55, 53, 52, 51, 50, 48, 47, 45,
- 44, 43, 41, 40, 38, 36, 35, 33 };
-
-#if CONFIG_EXT_PARTITION
-static const uint8_t obmc_mask_flip64[64] = {
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62,
- 62, 62, 62, 61, 60, 60, 60, 60, 60, 59, 58, 58, 57, 57, 56, 56,
- 56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 49, 48, 47, 47, 46, 45,
- 44, 44, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33,
-};
-#endif // CONFIG_EXT_PARTITION
-
-const uint8_t *av1_get_obmc_mask_flipped(int length) {
- switch (length) {
- case 1: return obmc_mask_flip1;
- case 2: return obmc_mask_flip2;
- case 4: return obmc_mask_flip4;
- case 8: return obmc_mask_flip8;
- case 16: return obmc_mask_flip16;
- case 32: return obmc_mask_flip32;
-#if CONFIG_EXT_PARTITION
- case 64: return obmc_mask_flip64;
-#endif // CONFIG_EXT_PARTITION
- default: assert(0); return NULL;
- }
-}
-#endif // CONFIG_NCOBMC
-
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
- uint8_t mi_hw, MODE_INFO *mi,
- void *fun_ctxt) {
+ uint8_t mi_hw, MB_MODE_INFO *mi,
+ void *fun_ctxt, const int num_planes) {
(void)xd;
(void)rel_mi_rc;
(void)mi_hw;
(void)mi;
++*(int *)fun_ctxt;
+ (void)num_planes;
}
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col) {
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MB_MODE_INFO *mbmi = xd->mi[0];
mbmi->overlappable_neighbors[0] = 0;
mbmi->overlappable_neighbors[1] = 0;
@@ -2066,21 +1129,17 @@ void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
&mbmi->overlappable_neighbors[1]);
}
-// HW does not support < 4x4 prediction. To limit the bandwidth requirement, for
-// small blocks, only blend with neighbors from one side. If block-size of
-// current plane is 4x4 or 8x4, the above neighbor (dir = 0) will be skipped. If
-// it is 4x8, the left neighbor (dir = 1) will be skipped.
+// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
+// block-size of current plane is smaller than 8x8, always only blend with the
+// left neighbor(s) (skip blending with the above side).
#define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
-int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
- int dir) {
+int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
+ const struct macroblockd_plane *pd, int dir) {
assert(is_motion_variation_allowed_bsize(bsize));
- BLOCK_SIZE bsize_plane =
- ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- if (bsize_plane < BLOCK_4X4) return 1;
-#endif
+ const BLOCK_SIZE bsize_plane =
+ get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
switch (bsize_plane) {
#if DISABLE_CHROMA_U8X8_OBMC
case BLOCK_4X4:
@@ -2095,6 +1154,13 @@ int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
}
}
+void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
+ mbmi->ref_frame[1] = NONE_FRAME;
+ mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+
+ return;
+}
+
struct obmc_inter_pred_ctxt {
uint8_t **adjacent;
int *adjacent_stride;
@@ -2102,24 +1168,23 @@ struct obmc_inter_pred_ctxt {
static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
uint8_t above_mi_width,
- MODE_INFO *above_mi,
- void *fun_ctxt) {
+ MB_MODE_INFO *above_mi,
+ void *fun_ctxt,
+ const int num_planes) {
(void)above_mi;
struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-#if CONFIG_HIGHBITDEPTH
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-#endif // CONFIG_HIGHBITDEPTH
const int overlap =
AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
- for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ for (int plane = 0; plane < num_planes; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
const int bh = overlap >> pd->subsampling_y;
const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
- if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
const int dst_stride = pd->dst.stride;
uint8_t *const dst = &pd->dst.buf[plane_col];
@@ -2127,37 +1192,34 @@ static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
const uint8_t *const mask = av1_get_obmc_mask(bh);
-#if CONFIG_HIGHBITDEPTH
if (is_hbd)
aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bh, bw, xd->bd);
+ tmp_stride, mask, bw, bh, xd->bd);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bh, bw);
+ mask, bw, bh);
}
}
static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
uint8_t left_mi_height,
- MODE_INFO *left_mi,
- void *fun_ctxt) {
+ MB_MODE_INFO *left_mi,
+ void *fun_ctxt,
+ const int num_planes) {
(void)left_mi;
struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int overlap =
AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
-#if CONFIG_HIGHBITDEPTH
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-#endif // CONFIG_HIGHBITDEPTH
- for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ for (int plane = 0; plane < num_planes; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = overlap >> pd->subsampling_x;
const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
- if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
+ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
const int dst_stride = pd->dst.stride;
uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
@@ -2165,14 +1227,12 @@ static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
const uint8_t *const mask = av1_get_obmc_mask(bw);
-#if CONFIG_HIGHBITDEPTH
if (is_hbd)
aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bh, bw, xd->bd);
+ tmp_stride, mask, bw, bh, xd->bd);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bh, bw);
+ mask, bw, bh);
}
}
@@ -2186,86 +1246,41 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
int above_stride[MAX_MB_PLANE],
uint8_t *left[MAX_MB_PLANE],
int left_stride[MAX_MB_PLANE]) {
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
// handle above row
struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
foreach_overlappable_nb_above(cm, xd, mi_col,
- max_neighbor_obmc[b_width_log2_lookup[bsize]],
+ max_neighbor_obmc[mi_size_wide_log2[bsize]],
build_obmc_inter_pred_above, &ctxt_above);
// handle left column
struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
foreach_overlappable_nb_left(cm, xd, mi_row,
- max_neighbor_obmc[b_height_log2_lookup[bsize]],
+ max_neighbor_obmc[mi_size_high_log2[bsize]],
build_obmc_inter_pred_left, &ctxt_left);
}
-void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
- if (is_interintra_pred(mbmi)) {
- mbmi->ref_frame[1] = NONE_FRAME;
- } else if (has_second_ref(mbmi) &&
- is_masked_compound_type(mbmi->interinter_compound_type)) {
- mbmi->interinter_compound_type = COMPOUND_AVERAGE;
- mbmi->ref_frame[1] = NONE_FRAME;
-#if CONFIG_COMPOUND_SINGLEREF
- } else if (!has_second_ref(mbmi) &&
- is_inter_singleref_comp_mode(mbmi->mode)) {
- // mbmi->mode = compound_ref0_mode(mbmi->mode);
- mbmi->mode = compound_ref1_mode(mbmi->mode);
- assert(is_inter_singleref_mode(mbmi->mode));
- mbmi->mv[0].as_int = mbmi->mv[1].as_int;
-#endif // CONFIG_COMPOUND_SINGLEREF
- }
- if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME;
- return;
-}
-
-struct build_prediction_ctxt {
- const AV1_COMMON *cm;
- int mi_row;
- int mi_col;
- uint8_t **tmp_buf;
- int *tmp_width;
- int *tmp_height;
- int *tmp_stride;
- int mb_to_far_edge;
-};
-
-static INLINE void build_prediction_by_above_pred(MACROBLOCKD *xd,
- int rel_mi_col,
- uint8_t above_mi_width,
- MODE_INFO *above_mi,
- void *fun_ctxt) {
- MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+void av1_setup_build_prediction_by_above_pred(
+ MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
+ MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
+ const int num_planes) {
const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
const int above_mi_col = ctxt->mi_col + rel_mi_col;
- MB_MODE_INFO backup_mbmi = *above_mbmi;
- modify_neighbor_predictor_for_obmc(above_mbmi);
+ av1_modify_neighbor_predictor_for_obmc(above_mbmi);
- for (int j = 0; j < MAX_MB_PLANE; ++j) {
+ for (int j = 0; j < num_planes; ++j) {
struct macroblockd_plane *const pd = &xd->plane[j];
setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
NULL, pd->subsampling_x, pd->subsampling_y);
}
-#if CONFIG_COMPOUND_SINGLEREF
- const int num_refs = 1 + is_inter_anyref_comp_mode(above_mbmi->mode);
-#else
const int num_refs = 1 + has_second_ref(above_mbmi);
-#endif
for (int ref = 0; ref < num_refs; ++ref) {
-#if CONFIG_COMPOUND_SINGLEREF
- const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi)
- ? above_mbmi->ref_frame[ref]
- : above_mbmi->ref_frame[0];
-#else
const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
@@ -2274,31 +1289,37 @@ static INLINE void build_prediction_by_above_pred(MACROBLOCKD *xd,
aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
"Reference frame has invalid dimensions");
av1_setup_pre_planes(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col,
- &ref_buf->sf);
+ &ref_buf->sf, num_planes);
}
xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
xd->mb_to_right_edge = ctxt->mb_to_far_edge +
(xd->n8_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
+}
- int mi_x = above_mi_col << MI_SIZE_LOG2;
- int mi_y = ctxt->mi_row << MI_SIZE_LOG2;
+static INLINE void build_prediction_by_above_pred(
+ MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
+ MB_MODE_INFO *above_mbmi, void *fun_ctxt, const int num_planes) {
+ struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
+ const int above_mi_col = ctxt->mi_col + rel_mi_col;
+ int mi_x, mi_y;
+ MB_MODE_INFO backup_mbmi = *above_mbmi;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width,
+ above_mbmi, ctxt, num_planes);
+ mi_x = above_mi_col << MI_SIZE_LOG2;
+ mi_y = ctxt->mi_row << MI_SIZE_LOG2;
- for (int j = 0; j < MAX_MB_PLANE; ++j) {
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
+
+ for (int j = 0; j < num_planes; ++j) {
const struct macroblockd_plane *pd = &xd->plane[j];
int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4,
block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
- if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
- build_inter_predictors(ctxt->cm, xd, j, above_mi, 1, 0, bw, bh, 0, 0, bw,
- bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
+ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+ build_inter_predictors(ctxt->cm, xd, j, above_mbmi, 1, bw, bh, mi_x, mi_y);
}
*above_mbmi = backup_mbmi;
}
@@ -2322,9 +1343,9 @@ void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
mi_col, tmp_buf,
tmp_width, tmp_height,
tmp_stride, xd->mb_to_right_edge };
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ BLOCK_SIZE bsize = xd->mi[0]->sb_type;
foreach_overlappable_nb_above(cm, xd, mi_col,
- max_neighbor_obmc[b_width_log2_lookup[bsize]],
+ max_neighbor_obmc[mi_size_wide_log2[bsize]],
build_prediction_by_above_pred, &ctxt);
xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
@@ -2332,40 +1353,27 @@ void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
}
-static INLINE void build_prediction_by_left_pred(MACROBLOCKD *xd,
- int rel_mi_row,
- uint8_t left_mi_height,
- MODE_INFO *left_mi,
- void *fun_ctxt) {
- MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
+ uint8_t left_mi_height,
+ MB_MODE_INFO *left_mbmi,
+ struct build_prediction_ctxt *ctxt,
+ const int num_planes) {
const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
const int left_mi_row = ctxt->mi_row + rel_mi_row;
- MB_MODE_INFO backup_mbmi = *left_mbmi;
- modify_neighbor_predictor_for_obmc(left_mbmi);
+ av1_modify_neighbor_predictor_for_obmc(left_mbmi);
- for (int j = 0; j < MAX_MB_PLANE; ++j) {
+ for (int j = 0; j < num_planes; ++j) {
struct macroblockd_plane *const pd = &xd->plane[j];
setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
NULL, pd->subsampling_x, pd->subsampling_y);
}
-#if CONFIG_COMPOUND_SINGLEREF
- const int num_refs = 1 + is_inter_anyref_comp_mode(left_mbmi->mode);
-#else
const int num_refs = 1 + has_second_ref(left_mbmi);
-#endif
for (int ref = 0; ref < num_refs; ++ref) {
-#if CONFIG_COMPOUND_SINGLEREF
- const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi)
- ? left_mbmi->ref_frame[ref]
- : left_mbmi->ref_frame[0];
-#else
const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
@@ -2374,31 +1382,37 @@ static INLINE void build_prediction_by_left_pred(MACROBLOCKD *xd,
aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
"Reference frame has invalid dimensions");
av1_setup_pre_planes(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col,
- &ref_buf->sf);
+ &ref_buf->sf, num_planes);
}
xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
xd->mb_to_bottom_edge =
ctxt->mb_to_far_edge +
(xd->n8_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
+}
- int mi_x = ctxt->mi_col << MI_SIZE_LOG2;
- int mi_y = left_mi_row << MI_SIZE_LOG2;
+static INLINE void build_prediction_by_left_pred(
+ MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height,
+ MB_MODE_INFO *left_mbmi, void *fun_ctxt, const int num_planes) {
+ struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
+ const int left_mi_row = ctxt->mi_row + rel_mi_row;
+ int mi_x, mi_y;
+ MB_MODE_INFO backup_mbmi = *left_mbmi;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ av1_setup_build_prediction_by_left_pred(xd, rel_mi_row, left_mi_height,
+ left_mbmi, ctxt, num_planes);
+ mi_x = ctxt->mi_col << MI_SIZE_LOG2;
+ mi_y = left_mi_row << MI_SIZE_LOG2;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- for (int j = 0; j < MAX_MB_PLANE; ++j) {
+ for (int j = 0; j < num_planes; ++j) {
const struct macroblockd_plane *pd = &xd->plane[j];
int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4,
block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y;
- if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
- build_inter_predictors(ctxt->cm, xd, j, left_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
+ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
+ build_inter_predictors(ctxt->cm, xd, j, left_mbmi, 1, bw, bh, mi_x, mi_y);
}
*left_mbmi = backup_mbmi;
}
@@ -2422,9 +1436,9 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
mi_col, tmp_buf,
tmp_width, tmp_height,
tmp_stride, xd->mb_to_bottom_edge };
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ BLOCK_SIZE bsize = xd->mi[0]->sb_type;
foreach_overlappable_nb_left(cm, xd, mi_row,
- max_neighbor_obmc[b_height_log2_lookup[bsize]],
+ max_neighbor_obmc[mi_size_high_log2[bsize]],
build_prediction_by_left_pred, &ctxt);
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
@@ -2434,13 +1448,9 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col) {
-#if CONFIG_HIGHBITDEPTH
+ const int num_planes = av1_num_planes(cm);
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
-#else
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
-#endif // CONFIG_HIGHBITDEPTH
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
@@ -2449,7 +1459,6 @@ void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
@@ -2459,434 +1468,25 @@ void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
} else {
-#endif // CONFIG_HIGHBITDEPTH
dst_buf1[0] = tmp_buf1;
dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
dst_buf2[0] = tmp_buf2;
dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
-#if CONFIG_HIGHBITDEPTH
}
-#endif // CONFIG_HIGHBITDEPTH
av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
dst_width1, dst_height1, dst_stride1);
av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
dst_width2, dst_height2, dst_stride2);
- av1_setup_dst_planes(xd->plane, xd->mi[0]->mbmi.sb_type,
- get_frame_new_buffer(cm), mi_row, mi_col);
+ av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, get_frame_new_buffer(cm),
+ mi_row, mi_col, 0, num_planes);
av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
}
-#if CONFIG_NCOBMC
-void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
-#if CONFIG_DEBUG
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-#endif
- int i, j, mi_step, ref;
- const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
- int mb_to_right_edge_base = xd->mb_to_right_edge;
-
- if (mi_row + xd->n8_h >= tile->mi_row_end ||
- (mi_row + xd->n8_h) % MI_SIZE == 0 || (mi_row + xd->n8_h) >= cm->mi_rows)
- return;
- assert(bsize >= BLOCK_8X8);
-
- xd->mb_to_top_edge -= xd->n8_h * 32;
- for (i = 0; i < ilimit; i += mi_step) {
- int mi_row_offset = xd->n8_h;
- int mi_col_offset = i;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
- MB_MODE_INFO backup_mbmi;
-
- mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(mbmi)) continue;
-
- backup_mbmi = *mbmi;
- modify_neighbor_predictor_for_obmc(mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
- tmp_width[j], tmp_height[j], tmp_stride[j],
- (xd->n8_h >> 1), i, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
- for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + (xd->n8_h >> 1),
- mi_col + i, &ref_buf->sf);
- }
-
- xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
- xd->mb_to_right_edge =
- mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
- mi_x = (mi_col + i) << MI_SIZE_LOG2;
- mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * (MI_SIZE >> 1);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_x;
- bh = (xd->n8_h << (MI_SIZE_LOG2 - 1)) >> pd->subsampling_y;
-
- if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
- const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> (!have_vsplit);
- const int num_4x4_h = 2 >> (!have_hsplit);
- const int pw = 8 >> (have_vsplit + pd->subsampling_x);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y != 0)
- continue;
-
- build_inter_predictors(cm, xd, j, mi, 1, y * 2 + x, bw, bh,
- (4 * x) >> pd->subsampling_x,
- xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0,
- pw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- } else {
- build_inter_predictors(cm, xd, j, mi, 1, 0, bw, bh, 0,
- xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw,
- bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- }
- *mbmi = backup_mbmi;
- }
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
- xd->mb_to_right_edge = mb_to_right_edge_base;
- xd->mb_to_top_edge += xd->n8_h * 32;
-}
-
-void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- const int tmp_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
-#if CONFIG_DEBUG
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-#endif
- int i, j, mi_step, ref;
- const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
- int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
-
- if (mi_col + xd->n8_w >= tile->mi_col_end ||
- (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
- return;
-
- assert(bsize >= BLOCK_8X8);
-
- xd->mb_to_left_edge -= xd->n8_w / 2 * MI_SIZE * 8;
- for (i = 0; i < ilimit; i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = xd->n8_w;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
- MB_MODE_INFO backup_mbmi;
-
- mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(mbmi)) continue;
-
- backup_mbmi = *mbmi;
- modify_neighbor_predictor_for_obmc(mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
- tmp_width[j], tmp_height[j], tmp_stride[j], i,
- xd->n8_w >> 1, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
- for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i,
- mi_col + (xd->n8_w >> 1), &ref_buf->sf);
- }
-
- xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
- xd->mb_to_bottom_edge =
- mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * MI_SIZE * 8;
- mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * (MI_SIZE >> 1);
- mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = (xd->n8_w << (MI_SIZE_LOG2 - 1)) >> pd->subsampling_x;
- bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
- const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> (!have_vsplit);
- const int num_4x4_h = 2 >> (!have_hsplit);
- const int ph = 8 >> (have_hsplit + pd->subsampling_y);
- int x, y;
-
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x != 0)
- continue;
-
- build_inter_predictors(cm, xd, j, mi, 1, y * 2 + x, bw, bh,
- xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
- (4 * y) >> pd->subsampling_y, bw, ph,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- } else {
- build_inter_predictors(cm, xd, j, mi, 1, 0, bw, bh,
- xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0, 0,
- bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- }
- *mbmi = backup_mbmi;
- }
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
- xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
- xd->mb_to_left_edge += xd->n8_w / 2 * MI_SIZE * 8;
-}
-
-// This function combines motion compensated predictions that is generated by
-// bottom/right neighboring blocks' inter predictors with prediction in dst
-// buffer.
-void av1_merge_dst_bottom_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *bottom[MAX_MB_PLANE],
- const int bottom_stride[MAX_MB_PLANE],
- uint8_t *right[MAX_MB_PLANE],
- const int right_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- int plane, i, mi_step;
- const int bottom_available = mi_row + xd->n8_h < tile->mi_row_end &&
- (mi_row + xd->n8_h) % MI_SIZE != 0 &&
- (mi_row + xd->n8_h) < cm->mi_rows;
-#if CONFIG_HIGHBITDEPTH
- int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-#endif // CONFIG_HIGHBITDEPTH
-
- // handle bottom row
- for (i = 0; bottom_available && i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
- i += mi_step) {
- int mi_row_offset = xd->n8_h;
- int mi_col_offset = i;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
- int overlap;
-
- mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(mbmi)) continue;
-
- overlap = num_4x4_blocks_high_lookup[bsize] << 1;
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
- const int bh = overlap >> pd->subsampling_y;
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[((i * MI_SIZE) >> pd->subsampling_x) +
- (((xd->n8_h * MI_SIZE - overlap) * dst_stride) >>
- pd->subsampling_y)];
- const int tmp_stride = bottom_stride[plane];
- const uint8_t *const tmp =
- &bottom[plane][((i * MI_SIZE) >> pd->subsampling_x) +
- (((xd->n8_h * MI_SIZE - overlap) * tmp_stride) >>
- pd->subsampling_y)];
- const uint8_t *const mask = av1_get_obmc_mask_flipped(bh);
-
-#if CONFIG_HIGHBITDEPTH
- if (is_hbd)
- aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bh, bw, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bh, bw);
- }
- } // each mi in the bottom row
-
- // handle right column
- if (mi_col + xd->n8_w >= tile->mi_col_end ||
- (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
- return;
-
- for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = xd->n8_w;
- int overlap;
- MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- MB_MODE_INFO *mbmi = &mi->mbmi;
-
- mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
-
- if (!is_neighbor_overlappable(mbmi)) continue;
-
- overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = overlap >> pd->subsampling_x;
- const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[((i * MI_SIZE * dst_stride) >> pd->subsampling_y) +
- ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
- const int tmp_stride = right_stride[plane];
- const uint8_t *const tmp =
- &right[plane][((i * MI_SIZE * tmp_stride) >> pd->subsampling_y) +
- ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
- const uint8_t *const mask = av1_get_obmc_mask_flipped(bw);
-
-#if CONFIG_HIGHBITDEPTH
- if (is_hbd)
- aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bh, bw, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bh, bw);
- }
- } // each mi in the right column
-}
-
-// This function generates 4 sided obmc. (1) Prediction blocks generated by
-// bottom and right motion vectors are calculated. (2) Combine them with the
-// original prediction block (which should be pre-stored in xd->plane[].dst.buf
-// before calling this function). The results is updated in xd->plane[].dst.buf
-// (3) Call causal obmc prediction function, which will generate left and above
-// preds, and then merge them and xd->plane[].dst.buf.
-void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col) {
-#if CONFIG_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
-#else
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
-#endif // CONFIG_HIGHBITDEPTH
- uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
- int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int len = sizeof(uint16_t);
- dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
- dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
- dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
- dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
- dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
- dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
- } else {
-#endif // CONFIG_HIGHBITDEPTH
- dst_buf1[0] = tmp_buf1;
- dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
- dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
- dst_buf2[0] = tmp_buf2;
- dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
- dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
-#if CONFIG_HIGHBITDEPTH
- }
-#endif // CONFIG_HIGHBITDEPTH
-
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
- // TODO(zoeliu): COMPOUND_SINGLEREF has not worked with NCOBMC yet.
- av1_build_prediction_by_bottom_preds(cm, xd, mi_row, mi_col, dst_buf1,
- dst_width1, dst_height1, dst_stride1);
- av1_build_prediction_by_right_preds(cm, xd, mi_row, mi_col, dst_buf2,
- dst_width2, dst_height2, dst_stride2);
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col);
- av1_merge_dst_bottom_right_preds(cm, xd, mi_row, mi_col, dst_buf1,
- dst_stride1, dst_buf2, dst_stride2);
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col);
- av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col);
-}
-#endif // CONFIG_NCOBMC
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-void reset_xd_boundary(MACROBLOCKD *xd, int mi_row, int bh, int mi_col, int bw,
- int mi_rows, int mi_cols) {
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
- xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
- xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
-}
-void set_sb_mi_boundaries(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
- const int mi_row, const int mi_col) {
- const BLOCK_SIZE sb = cm->sb_size;
- const int num_mi_w = mi_size_wide[sb];
- const int num_mi_h = mi_size_high[sb];
-
- xd->sb_mi_bd.mi_col_begin = mi_col;
- xd->sb_mi_bd.mi_row_begin = mi_row;
- // points to the last mi
- xd->sb_mi_bd.mi_col_end =
- mi_col + num_mi_w > cm->mi_cols ? cm->mi_cols - 1 : mi_col + num_mi_w - 1;
- xd->sb_mi_bd.mi_row_end =
- mi_row + num_mi_h > cm->mi_rows ? cm->mi_rows - 1 : mi_row + num_mi_h - 1;
-}
-#endif
-
-#endif // CONFIG_MOTION_VAR
-
/* clang-format off */
-#if CONFIG_INTERINTRA
-#if CONFIG_EXT_PARTITION
-static const int ii_weights1d[MAX_SB_SIZE] = {
+static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
@@ -2895,103 +1495,82 @@ static const int ii_weights1d[MAX_SB_SIZE] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
-static int ii_size_scales[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 32, 32, 32,
-#endif
+static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
32, 16, 16, 16, 8, 8, 8, 4,
4, 4, 2, 2, 2, 1, 1, 1,
- 16, 16, 8, 8, 4, 4, 2, 2
-};
-#else
-static const int ii_weights1d[MAX_SB_SIZE] = {
- 60, 56, 52, 48, 45, 42, 39, 37, 34, 32, 30, 28, 26, 24, 22, 21,
- 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7,
- 6, 6, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2,
- 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-static int ii_size_scales[BLOCK_SIZES_ALL] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- 16, 16, 16,
-#endif
- 16, 8, 8, 8, 4, 4, 4,
- 2, 2, 2, 1, 1, 1,
- 8, 8, 4, 4, 2, 2,
+ 8, 8, 4, 4, 2, 2
};
/* clang-format on */
-#endif // CONFIG_EXT_PARTITION
-static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
- int wedge_index, int wedge_sign,
- BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
- uint8_t *comppred, int compstride,
- const uint8_t *interpred, int interstride,
- const uint8_t *intrapred, int intrastride) {
+static void build_smooth_interintra_mask(uint8_t *mask, int stride,
+ BLOCK_SIZE plane_bsize,
+ INTERINTRA_MODE mode) {
+ int i, j;
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
const int size_scale = ii_size_scales[plane_bsize];
- int i, j;
-
- if (use_wedge_interintra) {
- if (is_interintra_wedge_used(bsize)) {
- const uint8_t *mask =
- av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
- const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
- aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
- interpred, interstride, mask, block_size_wide[bsize],
- bh, bw, subh, subw);
- }
- return;
- }
switch (mode) {
case II_V_PRED:
for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[i * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
+ memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
+ mask += stride;
}
break;
case II_H_PRED:
for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[j * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
+ for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
+ mask += stride;
}
break;
case II_SMOOTH_PRED:
for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[(i < j ? i : j) * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
+ for (j = 0; j < bw; ++j)
+ mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
+ mask += stride;
}
break;
case II_DC_PRED:
default:
for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- comppred[i * compstride + j] = AOM_BLEND_AVG(
- intrapred[i * intrastride + j], interpred[i * interstride + j]);
- }
+ memset(mask, 32, bw * sizeof(mask[0]));
+ mask += stride;
}
break;
}
}
-#if CONFIG_HIGHBITDEPTH
+static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+ uint8_t *comppred, int compstride,
+ const uint8_t *interpred, int interstride,
+ const uint8_t *intrapred, int intrastride) {
+ const int bw = block_size_wide[plane_bsize];
+ const int bh = block_size_high[plane_bsize];
+
+ if (use_wedge_interintra) {
+ if (is_interintra_wedge_used(bsize)) {
+ const uint8_t *mask =
+ av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+ const int subw = 2 * mi_size_wide[bsize] == bw;
+ const int subh = 2 * mi_size_high[bsize] == bh;
+ aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
+ interpred, interstride, mask, block_size_wide[bsize],
+ bw, bh, subw, subh);
+ }
+ return;
+ }
+
+ uint8_t mask[MAX_SB_SQUARE];
+ build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
+ aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
+ interstride, mask, bw, bw, bh, 0, 0);
+}
+
static void combine_interintra_highbd(
INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
@@ -2999,72 +1578,26 @@ static void combine_interintra_highbd(
int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
- const int size_scale = ii_size_scales[plane_bsize];
- int i, j;
-
- uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
- const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
- const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
if (use_wedge_interintra) {
if (is_interintra_wedge_used(bsize)) {
const uint8_t *mask =
av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
- const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
+ const int subh = 2 * mi_size_high[bsize] == bh;
+ const int subw = 2 * mi_size_wide[bsize] == bw;
aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
interpred8, interstride, mask,
- block_size_wide[bsize], bh, bw, subh, subw, bd);
+ block_size_wide[bsize], bw, bh, subw, subh, bd);
}
return;
}
- switch (mode) {
- case II_V_PRED:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[i * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
- }
- break;
-
- case II_H_PRED:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[j * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
- }
- break;
-
- case II_SMOOTH_PRED:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- int scale = ii_weights1d[(i < j ? i : j) * size_scale];
- comppred[i * compstride + j] =
- AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
- interpred[i * interstride + j]);
- }
- }
- break;
-
- case II_DC_PRED:
- default:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) {
- comppred[i * compstride + j] = AOM_BLEND_AVG(
- interpred[i * interstride + j], intrapred[i * intrastride + j]);
- }
- }
- break;
- }
+ uint8_t mask[MAX_SB_SQUARE];
+ build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
+ aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
+ interpred8, interstride, mask, bw, bw, bh, 0, 0,
+ bd);
}
-#endif // CONFIG_HIGHBITDEPTH
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
MACROBLOCKD *xd,
@@ -3072,42 +1605,46 @@ void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
BUFFER_SET *ctx, uint8_t *dst,
int dst_stride) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
- PREDICTION_MODE mode =
- interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode];
+ const int ssx = xd->plane[plane].subsampling_x;
+ const int ssy = xd->plane[plane].subsampling_y;
+ BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
+ PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
+ xd->mi[0]->angle_delta[PLANE_TYPE_Y] = 0;
+ xd->mi[0]->angle_delta[PLANE_TYPE_UV] = 0;
+ xd->mi[0]->filter_intra_mode_info.use_filter_intra = 0;
+ xd->mi[0]->use_intrabc = 0;
- av1_predict_intra_block(cm, xd, pd->width, pd->height, plane_bsize, mode,
- ctx->plane[plane], ctx->stride[plane], dst,
- dst_stride, 0, 0, plane);
+ av1_predict_intra_block(cm, xd, pd->width, pd->height,
+ max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
+ FILTER_INTRA_MODES, ctx->plane[plane],
+ ctx->stride[plane], dst, dst_stride, 0, 0, plane);
}
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
const uint8_t *inter_pred, int inter_stride,
const uint8_t *intra_pred, int intra_stride) {
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
-#if CONFIG_HIGHBITDEPTH
+ const int ssx = xd->plane[plane].subsampling_x;
+ const int ssy = xd->plane[plane].subsampling_y;
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
combine_interintra_highbd(
- xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra,
- xd->mi[0]->mbmi.interintra_wedge_index,
- xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
- xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred,
- inter_stride, intra_pred, intra_stride, xd->bd);
+ xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
+ xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
+ bsize, plane_bsize, xd->plane[plane].dst.buf,
+ xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred,
+ intra_stride, xd->bd);
return;
}
-#endif // CONFIG_HIGHBITDEPTH
- combine_interintra(xd->mi[0]->mbmi.interintra_mode,
- xd->mi[0]->mbmi.use_wedge_interintra,
- xd->mi[0]->mbmi.interintra_wedge_index,
- xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
- xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
- inter_pred, inter_stride, intra_pred, intra_stride);
+ combine_interintra(
+ xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
+ xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
+ bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
+ inter_pred, inter_stride, intra_pred, intra_stride);
}
void av1_build_interintra_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *ypred, int ystride,
BUFFER_SET *ctx, BLOCK_SIZE bsize) {
-#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
av1_build_intra_predictors_for_interintra(
@@ -3116,7 +1653,6 @@ void av1_build_interintra_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
return;
}
-#endif // CONFIG_HIGHBITDEPTH
{
DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, ctx,
@@ -3130,7 +1666,6 @@ void av1_build_interintra_predictors_sbc(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *upred, int ustride,
BUFFER_SET *ctx, int plane,
BLOCK_SIZE bsize) {
-#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
av1_build_intra_predictors_for_interintra(
@@ -3138,10 +1673,7 @@ void av1_build_interintra_predictors_sbc(const AV1_COMMON *cm, MACROBLOCKD *xd,
MAX_SB_SIZE);
av1_combine_interintra(xd, bsize, plane, upred, ustride,
CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
- return;
- }
-#endif // CONFIG_HIGHBITDEPTH
- {
+ } else {
DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
uintrapredictor, MAX_SB_SIZE);
@@ -3167,966 +1699,119 @@ void av1_build_interintra_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
av1_build_interintra_predictors_sbuv(cm, xd, upred, vpred, ustride, vstride,
ctx, bsize);
}
-#endif // CONFIG_INTERINTRA
// Builds the inter-predictor for the single ref case
// for use in the encoder to search the wedges efficiently.
static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
- int block, int bw, int bh, int x,
- int y, int w, int h, int mi_x,
- int mi_y, int ref,
- uint8_t *const ext_dst,
- int ext_dst_stride) {
+ int bw, int bh, int x, int y,
+ int w, int h, int mi_x, int mi_y,
+ int ref, uint8_t *const ext_dst,
+ int ext_dst_stride,
+ int can_use_previous) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- const MODE_INFO *mi = xd->mi[0];
+ const MB_MODE_INFO *mi = xd->mi[0];
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
-#if CONFIG_HIGHBITDEPTH
+ const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
uint8_t *const dst =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst)
- : ext_dst) +
- ext_dst_stride * y + x;
-#else
- uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
-#endif
- const MV mv = mi->mbmi.sb_type < BLOCK_8X8
- ? average_split_mvs(pd, mi, ref, block)
- : mi->mbmi.mv[ref].as_mv;
+ (hbd ? CONVERT_TO_BYTEPTR(ext_dst) : ext_dst) + ext_dst_stride * y + x;
+ const MV mv = mi->mv[ref].as_mv;
- uint8_t *pre;
- int xs, ys, subpel_x, subpel_y;
- const int is_scaled = av1_is_scaled(sf);
- ConvolveParams conv_params = get_conv_params(ref, 0, plane);
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ ConvolveParams conv_params = get_conv_params(ref, 0, plane, xd->bd);
WarpTypesAllowed warp_types;
-#if CONFIG_GLOBAL_MOTION
-#if CONFIG_COMPOUND_SINGLEREF
- WarpedMotionParams *const wm =
- mi->mbmi.ref_frame[ref] > 0 ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
- : &xd->global_motion[mi->mbmi.ref_frame[0]];
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
-#endif // CONFIG_COMPOUND_SINGLEREF
- warp_types.global_warp_allowed = is_global_mv_block(mi, block, wm->wmtype);
-#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
- warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
-#endif // CONFIG_WARPED_MOTION
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-
- if (is_scaled) {
- int ssx = pd->subsampling_x;
- int ssy = pd->subsampling_y;
- int orig_pos_y = (mi_y << (SUBPEL_BITS - ssy)) + (y << SUBPEL_BITS);
- orig_pos_y += mv.row * (1 << (1 - ssy));
- int orig_pos_x = (mi_x << (SUBPEL_BITS - ssx)) + (x << SUBPEL_BITS);
- orig_pos_x += mv.col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -((AOM_INTERP_EXTEND + bh) << SCALE_SUBPEL_BITS);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int left = -((AOM_INTERP_EXTEND + bw) << SCALE_SUBPEL_BITS);
- const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
- subpel_x = pos_x & SCALE_SUBPEL_MASK;
- subpel_y = pos_y & SCALE_SUBPEL_MASK;
- xs = sf->x_step_q4;
- ys = sf->y_step_q4;
- } else {
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- xs = ys = SCALE_SUBPEL_SHIFTS;
- subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
- (x + (mv_q4.col >> SUBPEL_BITS));
- }
+ const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
+ warp_types.global_warp_allowed = is_global_mv_block(mi, wm->wmtype);
+ warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
+ const int pre_x = (mi_x) >> pd->subsampling_x;
+ const int pre_y = (mi_y) >> pd->subsampling_y;
+ uint8_t *pre;
+ SubpelParams subpel_params;
+ calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
+ &subpel_params, bw, bh);
- av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
- subpel_y, sf, w, h, &conv_params,
- mi->mbmi.interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- &warp_types, (mi_x >> pd->subsampling_x) + x,
- (mi_y >> pd->subsampling_y) + y, plane, ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
- mi, 0,
-#endif
- xs, ys, xd);
+ av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride,
+ &subpel_params, sf, w, h, &conv_params,
+ mi->interp_filters, &warp_types, pre_x + x,
+ pre_y + y, plane, ref, mi, 0, xd, can_use_previous);
}
void av1_build_inter_predictors_for_planes_single_buf(
MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
- int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) {
+ int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
+ int can_use_previous) {
int plane;
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
for (plane = plane_from; plane <= plane_to; ++plane) {
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, &xd->plane[plane]);
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(
+ bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
-
- if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
- int x, y;
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- assert(bsize == BLOCK_8X8);
-#if CONFIG_COMPOUND_SINGLEREF
- assert(has_second_ref(&xd->mi[0]->mbmi) ||
- !is_inter_singleref_comp_mode(xd->mi[0]->mbmi.mode));
-#endif // CONFIG_COMPOUND_SINGLEREF
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x)
- build_inter_predictors_single_buf(
- xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref,
- ext_dst[plane], ext_dst_stride[plane]);
- } else {
- build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh,
- mi_x, mi_y, ref, ext_dst[plane],
- ext_dst_stride[plane]);
- }
+ build_inter_predictors_single_buf(xd, plane, bw, bh, 0, 0, bw, bh, mi_x,
+ mi_y, ref, ext_dst[plane],
+ ext_dst_stride[plane], can_use_previous);
}
}
static void build_wedge_inter_predictor_from_buf(
- MACROBLOCKD *xd, int plane, int x, int y, int w, int h,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- uint8_t *ext_dst0, int ext_dst_stride0, uint8_t *ext_dst1,
- int ext_dst_stride1) {
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0,
+ int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
+ MB_MODE_INFO *const mbmi = xd->mi[0];
const int is_compound = has_second_ref(mbmi);
MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- const INTERINTER_COMPOUND_DATA comp_data = {
-#if CONFIG_WEDGE
- mbmi->wedge_index,
- mbmi->wedge_sign,
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- mbmi->mask_type,
- xd->seg_mask,
-#endif // CONFIG_COMPOUND_SEGMENT
- mbmi->interinter_compound_type
- };
-
-#if CONFIG_COMPOUND_SINGLEREF
- if ((is_compound || is_inter_singleref_comp_mode(mbmi->mode)) &&
- is_masked_compound_type(mbmi->interinter_compound_type))
-#else // !CONFIG_COMPOUND_SINGLEREF
- if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type))
-#endif // CONFIG_COMPOUND_SINGLEREF
- {
-#if CONFIG_COMPOUND_SEGMENT
- if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
-#if CONFIG_HIGHBITDEPTH
+ mbmi->interinter_comp.seg_mask = xd->seg_mask;
+ const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
+
+ if (is_compound && is_masked_compound_type(comp_data->type)) {
+ if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_compound_seg_mask_highbd(
- comp_data.seg_mask, comp_data.mask_type,
+ av1_build_compound_diffwtd_mask_highbd(
+ comp_data->seg_mask, comp_data->mask_type,
CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, mbmi->sb_type, h, w,
- xd->bd);
+ CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
else
-#endif // CONFIG_HIGHBITDEPTH
- build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type,
- ext_dst0, ext_dst_stride0, ext_dst1,
- ext_dst_stride1, mbmi->sb_type, h, w);
+ av1_build_compound_diffwtd_mask(
+ comp_data->seg_mask, comp_data->mask_type, ext_dst0,
+ ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w);
}
-#endif // CONFIG_COMPOUND_SEGMENT
-#if CONFIG_SUPERTX
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_masked_compound_wedge_extend_highbd(
- dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
- mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
- else
-#endif // CONFIG_HIGHBITDEPTH
- build_masked_compound_wedge_extend(
- dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1,
- ext_dst_stride1, &comp_data, mbmi->sb_type, wedge_offset_x,
- wedge_offset_y, h, w);
-#else // !CONFIG_SUPERTX
-#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_highbd(
dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
+ CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data,
mbmi->sb_type, h, w, xd->bd);
else
-#endif // CONFIG_HIGHBITDEPTH
build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
- ext_dst1, ext_dst_stride1, &comp_data,
- mbmi->sb_type, h, w);
-#endif // CONFIG_SUPERTX
+ ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type,
+ h, w);
} else {
-#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
xd->bd);
else
-#endif // CONFIG_HIGHBITDEPTH
aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
0, NULL, 0, w, h);
}
}
-void av1_build_wedge_inter_predictor_from_buf(
- MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
- int ext_dst_stride1[3]) {
+void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+ int plane_from, int plane_to,
+ uint8_t *ext_dst0[3],
+ int ext_dst_stride0[3],
+ uint8_t *ext_dst1[3],
+ int ext_dst_stride1[3]) {
int plane;
for (plane = plane_from; plane <= plane_to; ++plane) {
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, &xd->plane[plane]);
-
- if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
- int x, y;
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- assert(bsize == BLOCK_8X8);
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x)
- build_wedge_inter_predictor_from_buf(
- xd, plane, 4 * x, 4 * y, 4, 4,
-#if CONFIG_SUPERTX
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_SUPERTX
- ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
- ext_dst_stride1[plane]);
- } else {
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- build_wedge_inter_predictor_from_buf(
- xd, plane, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_SUPERTX
- ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
- ext_dst_stride1[plane]);
- }
- }
-}
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-
-void alloc_ncobmc_pred_buffer(MACROBLOCKD *const xd) {
- int i;
- // allocate interpolated prediction buffer
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- xd->ncobmc_pred_buf[i] = (uint8_t *)malloc(sizeof(uint8_t) * MAX_SB_SQUARE);
- av1_zero_array(xd->ncobmc_pred_buf[i], MAX_SB_SQUARE);
- xd->ncobmc_pred_buf_stride[i] = MAX_SB_SIZE;
- }
-}
-
-void free_ncobmc_pred_buffer(MACROBLOCKD *const xd) {
- for (int i = 0; i < MAX_MB_PLANE; ++i) free(xd->ncobmc_pred_buf[i]);
-}
-
-void get_pred_from_intrpl_buf(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int plane) {
- uint8_t *dst = xd->plane[plane].dst.buf;
- int ds = xd->plane[plane].dst.stride;
- int ss_x = xd->plane[plane].subsampling_x;
- int ss_y = xd->plane[plane].subsampling_y;
-
- const int ip_wide = mi_size_wide[bsize] * MI_SIZE >> ss_x;
- const int ip_high = mi_size_high[bsize] * MI_SIZE >> ss_y;
- // relative coordinates of this MI in the superblock
- int row_rlt = (mi_row - xd->sb_mi_bd.mi_row_begin) * MI_SIZE >> ss_y;
- int col_rlt = (mi_col - xd->sb_mi_bd.mi_col_begin) * MI_SIZE >> ss_x;
- int s = xd->ncobmc_pred_buf_stride[plane];
- int r, c;
-
- for (r = 0; r < ip_high; ++r) {
- for (c = 0; c < ip_wide; ++c) {
- dst[r * ds + c] =
- xd->ncobmc_pred_buf[plane][(r + row_rlt) * s + c + col_rlt];
- }
- }
-}
-// scaling factors for ncobmc kernels
-#define KERNEL_SCALE_LOG 14
-
-void build_ncobmc_intrpl_pred(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- int plane, int pxl_row, int pxl_col,
- BLOCK_SIZE bsize, uint8_t *preds[][MAX_MB_PLANE],
- int stride[MAX_MB_PLANE], // pred buffer strides
- int mode) {
- const ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[bsize];
- const NCOBMC_KERNELS *const knls = &cm->ncobmc_kernels[ao_block][mode];
- const int wide = mi_size_wide[bsize] * MI_SIZE;
- const int high = mi_size_high[bsize] * MI_SIZE;
- const int s = stride[plane];
- const int ss_x = xd->plane[plane].subsampling_x;
- const int ss_y = xd->plane[plane].subsampling_y;
- int row_offset = (pxl_row - xd->sb_mi_bd.mi_row_begin * MI_SIZE) >> ss_y;
- int col_offset = (pxl_col - xd->sb_mi_bd.mi_col_begin * MI_SIZE) >> ss_x;
- int dst_stride = xd->ncobmc_pred_buf_stride[plane];
- int dst_offset = row_offset * dst_stride + col_offset;
-
-#if CONFIG_HIGHBITDEPTH
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-#else
- const int is_hbd = 0;
-#endif // CONFIG_HIGHBITDEPTH
-
- int r, c, k_r, k_c;
- int64_t tmp;
-
- for (r = 0; r < (high >> ss_x); ++r) {
- for (c = 0; c < (wide >> ss_y); ++c) {
- int pos = r * s + c;
- int q_tmp;
- uint8_t val;
-
- // TODO(weitinglin): find out the optimal sub-sampling patterns for
- // chroma
- k_r = (r << ss_y) + ss_y;
- k_c = (c << ss_x) + ss_x;
- if (ss_y && k_r >= high) k_r -= 1;
- if (ss_x && k_c >= wide) k_c -= 1;
-
- if (!is_hbd) {
- uint8_t *tmp_p[4];
- int i;
- for (i = 0; i < 4; ++i) tmp_p[i] = preds[i][plane];
-
- tmp = 0;
- for (i = 0; i < 4; ++i)
- tmp += knls->KERNEL[i][k_r][k_c] * tmp_p[i][pos];
-
- } else {
- uint16_t *tmp_p[4];
- int i;
- for (i = 0; i < 4; ++i) tmp_p[i] = CONVERT_TO_SHORTPTR(preds[i][plane]);
-
- tmp = 0;
- for (i = 0; i < 4; ++i)
- tmp += knls->KERNEL[i][k_r][k_c] * tmp_p[i][pos];
- }
-
- q_tmp = (tmp <= 0) ? 0 : ROUND_POWER_OF_TWO(tmp, KERNEL_SCALE_LOG);
- val = clip_pixel(q_tmp);
-
- xd->ncobmc_pred_buf[plane][r * dst_stride + c + dst_offset] = val;
-
- assert(r * dst_stride + c + dst_offset < MAX_SB_SQUARE);
- }
- }
-}
-
-void get_pred_by_horz_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
- const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
- const int mb_to_top_edge_base = xd->mb_to_top_edge;
- const int mb_to_left_edge_base = xd->mb_to_left_edge;
- const int mb_to_right_edge_base = xd->mb_to_right_edge;
- int overlappable_offset = -1;
- const int mi_nums = AOMMIN(mi_size_high[bsize], cm->mi_rows - mi_row);
-
- int i, j, mi_step, ref;
-
- xd->mb_to_right_edge += mi_size_wide[bsize] * MI_SIZE * 4;
-
- // build from left neighbors
- for (i = 0; i < mi_nums; i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = -1;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *left_mi;
- MB_MODE_INFO *left_mbmi, backup_mbmi;
- BLOCK_SIZE l_bsize;
-
- // create the original prediction if offset exceeds the boundary
- if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) mi_col_offset = 0;
-
- left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- left_mbmi = &left_mi->mbmi;
- l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
-
- mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
-
- // reset the mi if it is not overlappble
- if (!is_neighbor_overlappable(left_mbmi)) {
- // use left_mbmi->sb_type instead of l_bsize to handle
- // sub8x8 cases
- int search_mi_step = mi_size_high[left_mbmi->sb_type];
- while (!is_neighbor_overlappable(left_mbmi)) {
- mi_row_offset += search_mi_step;
- if (mi_row_offset < mi_nums) {
- left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- left_mbmi = &left_mi->mbmi;
- search_mi_step = mi_size_high[left_mbmi->sb_type];
- } else {
- if (overlappable_offset >= 0) {
- mi_row_offset = overlappable_offset;
- } else {
- mi_row_offset = 0;
- mi_col_offset = 0;
- }
- left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- left_mbmi = &left_mi->mbmi;
- break;
- }
- }
- } else {
- // update the available overlappable mi
- overlappable_offset = mi_row_offset;
- }
-
- backup_mbmi = *left_mbmi;
- modify_neighbor_predictor_for_obmc(left_mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, l_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
- dst_stride[j], i, 0, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(left_mbmi->mode));
- ++ref) {
- const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi)
- ? left_mbmi->ref_frame[ref]
- : left_mbmi->ref_frame[0];
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
- &ref_buf->sf);
- }
- xd->mb_to_top_edge = -((mi_row + i) * MI_SIZE * 8);
- xd->mb_to_bottom_edge =
- mb_to_bottom_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
- mi_x = mi_col << MI_SIZE_LOG2;
- mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = mi_size_wide[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
- bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- build_inter_predictors(cm, xd, j, left_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- *left_mbmi = backup_mbmi;
- }
-
- // build from right neighbors
- xd->mb_to_right_edge = mb_to_right_edge_base;
- xd->mb_to_left_edge -= mi_size_wide[bsize] * MI_SIZE * 4;
-
- overlappable_offset = -1;
-
- for (i = 0; i < mi_nums; i += mi_step) {
- int mi_row_offset = i;
- int mi_col_offset = mi_size_wide[bsize];
- int mi_x, mi_y, bw, bh;
- int mi_col_shift = mi_size_wide[bsize] >> 1;
- MODE_INFO *right_mi;
- MB_MODE_INFO *right_mbmi, backup_mbmi;
- BLOCK_SIZE r_bsize;
-
- // create the original prediction if offset exceeds the boundary
- if (mi_col + mi_col_offset > xd->sb_mi_bd.mi_col_end) mi_col_offset = 0;
-
- right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- right_mbmi = &right_mi->mbmi;
- r_bsize = AOMMAX(right_mbmi->sb_type, BLOCK_8X8);
-
- mi_step = AOMMIN(mi_nums, mi_size_high[r_bsize]);
-
- if (!is_neighbor_overlappable(right_mbmi)) {
- int search_mi_step = mi_size_high[right_mbmi->sb_type];
- while (!is_neighbor_overlappable(right_mbmi)) {
- mi_row_offset += search_mi_step;
- if (mi_row_offset < mi_nums) {
- right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- right_mbmi = &right_mi->mbmi;
- search_mi_step = mi_size_high[right_mbmi->sb_type];
- } else {
- if (overlappable_offset >= 0) {
- mi_row_offset = overlappable_offset;
- } else {
- mi_row_offset = 0;
- mi_col_offset = 0;
- }
- right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- right_mbmi = &right_mi->mbmi;
- break;
- }
- }
- } else {
- overlappable_offset = mi_row_offset;
- }
-
- backup_mbmi = *right_mbmi;
- modify_neighbor_predictor_for_obmc(right_mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, r_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
- dst_stride[j], i, mi_col_shift, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(right_mbmi->mode));
- ++ref) {
- const MV_REFERENCE_FRAME frame = has_second_ref(right_mbmi)
- ? right_mbmi->ref_frame[ref]
- : right_mbmi->ref_frame[0];
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- for (ref = 0; ref < 1 + has_second_ref(right_mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = right_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i,
- mi_col + mi_col_shift, &ref_buf->sf);
- }
- xd->mb_to_top_edge = -((mi_row + i) * MI_SIZE * 8);
- xd->mb_to_bottom_edge =
- mb_to_bottom_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
- mi_x = (mi_col + mi_col_shift) << MI_SIZE_LOG2;
- mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bw = mi_size_wide[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
- bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- build_inter_predictors(cm, xd, j, right_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
-
- *right_mbmi = backup_mbmi;
- }
-
- // restore the boundaries
- xd->mb_to_top_edge = mb_to_top_edge_base;
- xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
- xd->mb_to_left_edge = mb_to_left_edge_base;
- xd->mb_to_right_edge = mb_to_right_edge_base;
-}
-
-void get_pred_by_vert_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
- const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
- const int mb_to_top_edge_base = xd->mb_to_top_edge;
- const int mb_to_left_edge_base = xd->mb_to_left_edge;
- const int mb_to_right_edge_base = xd->mb_to_right_edge;
- int overlappable_offset = -1;
- const int mi_nums = AOMMIN(mi_size_wide[bsize], cm->mi_cols - mi_col);
-
- int i, j, mi_step, ref;
-
- xd->mb_to_bottom_edge += mi_nums * MI_SIZE * 4;
-
- // build from above neighbors
- for (i = 0; i < mi_nums; i += mi_step) {
- int mi_row_offset = -1;
- int mi_col_offset = i;
- int mi_x, mi_y, bw, bh;
- MODE_INFO *above_mi;
- MB_MODE_INFO *above_mbmi, backup_mbmi;
- BLOCK_SIZE a_bsize;
-
- // create the original prediction if offset exceeds the boundary
- if (mi_row <= tile->mi_row_start) mi_row_offset = 0;
-
- above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- above_mbmi = &above_mi->mbmi;
- a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
-
- mi_step = AOMMIN(mi_nums, mi_size_high[a_bsize]);
-
- // reset the mi if it is not overlappble
- if (!is_neighbor_overlappable(above_mbmi)) {
- int search_mi_step = mi_size_high[above_mbmi->sb_type];
- // backward search
- while (!is_neighbor_overlappable(above_mbmi)) {
- mi_col_offset += search_mi_step;
- if (mi_col_offset < mi_nums) {
- above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- above_mbmi = &above_mi->mbmi;
- search_mi_step = mi_size_high[above_mbmi->sb_type];
- } else {
- if (overlappable_offset >= 0) {
- mi_col_offset = overlappable_offset;
- } else {
- mi_row_offset = 0;
- mi_col_offset = 0;
- }
- above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- above_mbmi = &above_mi->mbmi;
- break;
- }
- }
- } else {
- // update the available overlappable mi
- overlappable_offset = mi_col_offset;
- }
-
- backup_mbmi = *above_mbmi;
- modify_neighbor_predictor_for_obmc(above_mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, a_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
- dst_stride[j], 0, i, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(above_mbmi->mode));
- ++ref) {
- const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi)
- ? above_mbmi->ref_frame[ref]
- : above_mbmi->ref_frame[0];
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
- &ref_buf->sf);
- }
-
- xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
- xd->mb_to_right_edge =
- mb_to_right_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
- mi_x = (mi_col + i) << MI_SIZE_LOG2;
- mi_y = mi_row << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
-
- bh = mi_size_high[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
- bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- build_inter_predictors(cm, xd, j, above_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
-
- *above_mbmi = backup_mbmi;
- }
-
- // build from bottom neighbors
- xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
- xd->mb_to_top_edge -= mi_size_high[bsize] * MI_SIZE * 4;
-
- overlappable_offset = -1;
-
- for (i = 0; i < mi_nums; i += mi_step) {
- int mi_row_offset = mi_size_high[bsize];
- int mi_col_offset = i;
- int mi_x, mi_y, bw, bh;
- int mi_row_shift = mi_size_high[bsize] >> 1;
- MODE_INFO *bottom_mi;
- MB_MODE_INFO *bottom_mbmi, backup_mbmi;
- BLOCK_SIZE b_bsize;
-
- // create the original prediction if offset exceeds the boundary
- if (mi_row + mi_row_offset > xd->sb_mi_bd.mi_row_end) mi_row_offset = 0;
-
- bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- bottom_mbmi = &bottom_mi->mbmi;
- b_bsize = AOMMAX(bottom_mbmi->sb_type, BLOCK_8X8);
-
- mi_step = AOMMIN(mi_nums, mi_size_high[b_bsize]);
-
- // reset the mi if it is not overlappble
- if (!is_neighbor_overlappable(bottom_mbmi)) {
- int search_mi_step = mi_size_high[bottom_mbmi->sb_type];
- while (!is_neighbor_overlappable(bottom_mbmi)) {
- mi_col_offset += search_mi_step;
- if (mi_col_offset < mi_nums) {
- bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- bottom_mbmi = &bottom_mi->mbmi;
- search_mi_step = mi_size_high[bottom_mbmi->sb_type];
- } else {
- if (overlappable_offset >= 0) {
- mi_col_offset = overlappable_offset;
- } else {
- mi_col_offset = 0;
- mi_row_offset = 0;
- }
- bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- bottom_mbmi = &bottom_mi->mbmi;
- break;
- }
- }
- } else {
- // update the available overlappable mi
- overlappable_offset = mi_col_offset;
- }
-
- backup_mbmi = *bottom_mbmi;
- modify_neighbor_predictor_for_obmc(bottom_mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, b_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
- dst_stride[j], mi_row_shift, i, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(bottom_mbmi->mode));
- ++ref) {
- const MV_REFERENCE_FRAME frame = has_second_ref(bottom_mbmi)
- ? bottom_mbmi->ref_frame[ref]
- : bottom_mbmi->ref_frame[0];
-#else // !(CONFIG_COMPOUND_SINGLEREF)
- for (ref = 0; ref < 1 + has_second_ref(bottom_mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = bottom_mbmi->ref_frame[ref];
-#endif // CONFIG_COMPOUND_SINGLEREF
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + mi_row_shift,
- mi_col + i, &ref_buf->sf);
- }
-
- xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
- xd->mb_to_right_edge =
- mb_to_right_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
- mi_x = (mi_col + i) << MI_SIZE_LOG2;
- mi_y = (mi_row + mi_row_shift) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
-
- bh = mi_size_high[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
- bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- build_inter_predictors(cm, xd, j, bottom_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
-
- *bottom_mbmi = backup_mbmi;
- }
- // restore the boundaries
- xd->mb_to_top_edge = mb_to_top_edge_base;
- xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
- xd->mb_to_left_edge = mb_to_left_edge_base;
- xd->mb_to_right_edge = mb_to_right_edge_base;
-}
-
-void get_pred_by_corner_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int bsize, int mi_row, int mi_col,
- uint8_t *dst_buf[MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]) {
- const TileInfo *const tile = &xd->tile;
- const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
- const int mb_to_top_edge_base = xd->mb_to_top_edge;
- const int mb_to_left_edge_base = xd->mb_to_left_edge;
- const int mb_to_right_edge_base = xd->mb_to_right_edge;
- const int mi_wide = mi_size_wide[bsize];
- const int mi_high = mi_size_high[bsize];
-
- // location of four mi sources
- const int mi_row_offsets[4] = { -1, -1, mi_high, mi_high };
- const int mi_col_offsets[4] = { -1, mi_wide, -1, mi_wide };
-
- MB_MODE_INFO backup_mbmi;
- int mi_x, mi_y, bh, bw;
- int i, j, ref;
-
- assert(bsize >= BLOCK_8X8);
-
- for (i = 0; i < 4; ++i) {
- int mi_row_offset = mi_row_offsets[i];
- int mi_col_offset = mi_col_offsets[i];
- MODE_INFO *corner_mi;
- MB_MODE_INFO *corner_mbmi;
-
- if (mi_col + mi_col_offset < tile->mi_col_start ||
- mi_col + mi_col_offset > xd->sb_mi_bd.mi_col_end)
- mi_col_offset = 0;
-
- if (mi_row + mi_row_offset < tile->mi_row_start ||
- mi_row + mi_row_offset > xd->sb_mi_bd.mi_row_end)
- mi_row_offset = 0;
-
- corner_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- corner_mbmi = &corner_mi->mbmi;
-
- // reset the mi if it is not overlappble
- if (!is_neighbor_overlappable(corner_mbmi)) {
- mi_row_offset = 0;
- mi_col_offset = 0;
- corner_mi = xd->mi[0];
- corner_mbmi = &corner_mi->mbmi;
- }
-
- backup_mbmi = *corner_mbmi;
- modify_neighbor_predictor_for_obmc(corner_mbmi);
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, BLOCK_8X8, dst_buf[j], MAX_SB_SIZE,
- MAX_SB_SIZE, dst_stride[j], (i / 2) * (mi_high >> 1),
- (i % 2) * (mi_wide >> 1), NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-
-#if CONFIG_COMPOUND_SINGLEREF
- for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(corner_mbmi->mode));
- ++ref) {
- const MV_REFERENCE_FRAME frame = has_second_ref(corner_mbmi)
- ? corner_mbmi->ref_frame[ref]
- : corner_mbmi->ref_frame[0];
-#else
- for (ref = 0; ref < 1 + has_second_ref(corner_mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = corner_mbmi->ref_frame[ref];
-#endif
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
- xd->block_refs[ref] = ref_buf;
-
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf,
- mi_row + (i / 2) * (mi_high >> 1),
- mi_col + (i % 2) * (mi_wide >> 1), &ref_buf->sf);
- }
- // adjust mi boundaries of this block
- xd->mb_to_bottom_edge =
- mb_to_bottom_edge_base + (1 - (i / 2)) * mi_high * MI_SIZE * 4;
- xd->mb_to_top_edge = mb_to_top_edge_base - (i / 2) * mi_high * MI_SIZE * 4;
- xd->mb_to_right_edge =
- mb_to_right_edge_base + (1 - (i % 2)) * mi_wide * MI_SIZE * 4;
- xd->mb_to_left_edge =
- mb_to_left_edge_base - (i % 2) * mi_wide * MI_SIZE * 4;
-
- mi_x = (mi_col + (i % 2) * mi_wide / 2) << MI_SIZE_LOG2;
- mi_y = (mi_row + (i / 2) * mi_high / 2) << MI_SIZE_LOG2;
-
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- bh = mi_high << MI_SIZE_LOG2 >> (pd->subsampling_x + 1);
- bw = mi_wide << MI_SIZE_LOG2 >> (pd->subsampling_y + 1);
- build_inter_predictors(cm, xd, j, corner_mi, 1, 0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
- }
- *corner_mbmi = backup_mbmi;
- }
- // restore the boundaries
- xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
- xd->mb_to_top_edge = mb_to_top_edge_base;
- xd->mb_to_right_edge = mb_to_right_edge_base;
- xd->mb_to_left_edge = mb_to_left_edge_base;
-}
-
-// get the stitched extra prediction for this block
-void av1_get_ext_blk_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[][MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]) {
- get_pred_by_corner_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[0],
- dst_stride);
- get_pred_by_vert_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[1],
- dst_stride);
- get_pred_by_horz_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[2],
- dst_stride);
-}
-
-void av1_get_ori_blk_pred(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]) {
- MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
- int mi_x = mi_col << MI_SIZE_LOG2;
- int mi_y = mi_row << MI_SIZE_LOG2;
- int bw = block_size_wide[bsize];
- int bh = block_size_high[bsize];
- int i, ref;
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- struct macroblockd_plane *const pd = &xd->plane[i];
- setup_pred_plane(&pd->dst, BLOCK_8X8, dst_buf[i], MAX_SB_SIZE, MAX_SB_SIZE,
- dst_stride[i], 0, 0, NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
-
- for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
- xd->block_refs[ref] = ref_buf;
-
- if (!av1_is_valid_scale(&ref_buf->sf))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
-
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf);
- }
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- const struct macroblockd_plane *pd = &xd->plane[i];
- build_inter_predictors(cm, xd, i, mi, 1, 0, bw >> pd->subsampling_x,
- bh >> pd->subsampling_y, 0, 0,
- bw >> pd->subsampling_x, bh >> pd->subsampling_y,
-#if CONFIG_SUPERTX
- 0, 0,
-#endif // CONFIG_SUPERTX
- mi_x, mi_y);
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(
+ bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
+ const int bw = block_size_wide[plane_bsize];
+ const int bh = block_size_high[plane_bsize];
+ build_wedge_inter_predictor_from_buf(
+ xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
+ ext_dst1[plane], ext_dst_stride1[plane]);
}
}
-
-#endif
diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h
index 0c3333339..aa3aefc88 100644
--- a/third_party/aom/av1/common/reconinter.h
+++ b/third_party/aom/av1/common/reconinter.h
@@ -15,164 +15,26 @@
#include "av1/common/filter.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/convolve.h"
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
#include "aom/aom_integer.h"
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#define WARP_WM_NEIGHBORS_WITH_OBMC 0
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-
-#if CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
-#define WARP_GM_NEIGHBORS_WITH_OBMC 0
-#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+// Work out how many pixels off the edge of a reference frame we're allowed
+// to go when forming an inter prediction.
+// The outermost row/col of each referernce frame is extended by
+// (AOM_BORDER_IN_PIXELS >> subsampling) pixels, but we need to keep
+// at least AOM_INTERP_EXTEND pixels within that to account for filtering.
+//
+// We have to break this up into two macros to keep both clang-format and
+// tools/lint-hunks.py happy.
+#define AOM_LEFT_TOP_MARGIN_PX(subsampling) \
+ ((AOM_BORDER_IN_PIXELS >> subsampling) - AOM_INTERP_EXTEND)
+#define AOM_LEFT_TOP_MARGIN_SCALED(subsampling) \
+ (AOM_LEFT_TOP_MARGIN_PX(subsampling) << SCALE_SUBPEL_BITS)
#ifdef __cplusplus
extern "C" {
#endif
-static INLINE int has_scale(int xs, int ys) {
- return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS;
-}
-
-static INLINE void inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int subpel_x,
- int subpel_y, const struct scale_factors *sf,
- int w, int h, ConvolveParams *conv_params,
- InterpFilters interp_filters, int xs,
- int ys) {
- assert(conv_params->do_average == 0 || conv_params->do_average == 1);
- assert(sf);
- if (has_scale(xs, ys)) {
- // TODO(afergs, debargha): Use a different scale convolve function
- // that uses higher precision for subpel_x, subpel_y, xs, ys
- if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
- av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys, 1,
- conv_params);
- conv_params->do_post_rounding = 1;
-#else
- assert(0);
-#endif // CONFIG_CONVOLVE_ROUND
- } else {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
- av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x, xs, subpel_y, ys, conv_params);
- }
- } else {
- subpel_x >>= SCALE_EXTRA_BITS;
- subpel_y >>= SCALE_EXTRA_BITS;
- xs >>= SCALE_EXTRA_BITS;
- ys >>= SCALE_EXTRA_BITS;
- assert(subpel_x < SUBPEL_SHIFTS);
- assert(subpel_y < SUBPEL_SHIFTS);
- assert(xs <= SUBPEL_SHIFTS);
- assert(ys <= SUBPEL_SHIFTS);
- if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
- av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys, 0,
- conv_params);
- conv_params->do_post_rounding = 1;
-#else
- assert(0);
-#endif // CONFIG_CONVOLVE_ROUND
- } else {
- assert(conv_params->round == CONVOLVE_OPT_ROUND);
-
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- if (w <= 2 || h <= 2) {
- av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x, xs, subpel_y, ys, conv_params);
- } else if (filter_params_x.taps == SUBPEL_TAPS &&
- filter_params_y.taps == SUBPEL_TAPS) {
- const int16_t *kernel_x =
- av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x);
- const int16_t *kernel_y =
- av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y);
- sf->predict[subpel_x != 0][subpel_y != 0][conv_params->do_average](
- src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
- } else {
- av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_x, xs, subpel_y, ys, conv_params);
- }
- }
- }
-}
-
-#if CONFIG_HIGHBITDEPTH
-static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int subpel_x, int subpel_y,
- const struct scale_factors *sf, int w,
- int h, ConvolveParams *conv_params,
- InterpFilters interp_filters, int xs,
- int ys, int bd) {
- const int avg = conv_params->do_average;
- assert(avg == 0 || avg == 1);
-
- if (has_scale(xs, ys)) {
- if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
- av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys,
- 1, conv_params, bd);
- conv_params->do_post_rounding = 1;
-#else
- assert(0);
-#endif // CONFIG_CONVOLVE_ROUND
- } else {
- av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys, avg,
- bd);
- }
- } else {
- subpel_x >>= SCALE_EXTRA_BITS;
- subpel_y >>= SCALE_EXTRA_BITS;
- xs >>= SCALE_EXTRA_BITS;
- ys >>= SCALE_EXTRA_BITS;
- assert(subpel_x < SUBPEL_SHIFTS);
- assert(subpel_y < SUBPEL_SHIFTS);
- assert(xs <= SUBPEL_SHIFTS);
- assert(ys <= SUBPEL_SHIFTS);
- if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
- av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys,
- 0, conv_params, bd);
- conv_params->do_post_rounding = 1;
-#else
- assert(0);
-#endif // CONFIG_CONVOLVE_ROUND
- } else {
- InterpFilterParams filter_params_x, filter_params_y;
- av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
- &filter_params_y);
-
- if (filter_params_x.taps == SUBPEL_TAPS &&
- filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
- const int16_t *kernel_x =
- av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x);
- const int16_t *kernel_y =
- av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y);
- sf->highbd_predict[subpel_x != 0][subpel_y != 0][avg](
- src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
- bd);
- } else {
- av1_highbd_convolve(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_x, xs, subpel_y, ys, avg,
- bd);
- }
- }
- }
-}
-#endif // CONFIG_HIGHBITDEPTH
-
// Set to (1 << 5) if the 32-ary codebooks are used for any bock size
#define MAX_WEDGE_TYPES (1 << 4)
@@ -208,38 +70,108 @@ typedef struct {
int bits;
const wedge_code_type *codebook;
uint8_t *signflip;
- int smoother;
wedge_masks_type *masks;
} wedge_params_type;
extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL];
+typedef struct SubpelParams {
+ int xs;
+ int ys;
+ int subpel_x;
+ int subpel_y;
+} SubpelParams;
+
+struct build_prediction_ctxt {
+ const AV1_COMMON *cm;
+ int mi_row;
+ int mi_col;
+ uint8_t **tmp_buf;
+ int *tmp_width;
+ int *tmp_height;
+ int *tmp_stride;
+ int mb_to_far_edge;
+};
+
+static INLINE int has_scale(int xs, int ys) {
+ return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS;
+}
+
+static INLINE void revert_scale_extra_bits(SubpelParams *sp) {
+ sp->subpel_x >>= SCALE_EXTRA_BITS;
+ sp->subpel_y >>= SCALE_EXTRA_BITS;
+ sp->xs >>= SCALE_EXTRA_BITS;
+ sp->ys >>= SCALE_EXTRA_BITS;
+ assert(sp->subpel_x < SUBPEL_SHIFTS);
+ assert(sp->subpel_y < SUBPEL_SHIFTS);
+ assert(sp->xs <= SUBPEL_SHIFTS);
+ assert(sp->ys <= SUBPEL_SHIFTS);
+}
+
+static INLINE void inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const SubpelParams *subpel_params,
+ const struct scale_factors *sf, int w, int h,
+ ConvolveParams *conv_params,
+ InterpFilters interp_filters) {
+ assert(conv_params->do_average == 0 || conv_params->do_average == 1);
+ assert(sf);
+ if (has_scale(subpel_params->xs, subpel_params->ys)) {
+ av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+ interp_filters, subpel_params->subpel_x,
+ subpel_params->xs, subpel_params->subpel_y,
+ subpel_params->ys, 1, conv_params, sf);
+ } else {
+ SubpelParams sp = *subpel_params;
+ revert_scale_extra_bits(&sp);
+ av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+ interp_filters, sp.subpel_x, sp.xs, sp.subpel_y,
+ sp.ys, 0, conv_params, sf);
+ }
+}
+
+static INLINE void highbd_inter_predictor(
+ const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+ const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
+ int h, ConvolveParams *conv_params, InterpFilters interp_filters, int bd) {
+ assert(conv_params->do_average == 0 || conv_params->do_average == 1);
+ assert(sf);
+ if (has_scale(subpel_params->xs, subpel_params->ys)) {
+ av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+ interp_filters, subpel_params->subpel_x,
+ subpel_params->xs, subpel_params->subpel_y,
+ subpel_params->ys, 1, conv_params, sf, bd);
+ } else {
+ SubpelParams sp = *subpel_params;
+ revert_scale_extra_bits(&sp);
+ av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+ interp_filters, sp.subpel_x, sp.xs,
+ sp.subpel_y, sp.ys, 0, conv_params, sf, bd);
+ }
+}
+
+void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi);
+int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
+ const struct macroblockd_plane *pd, int dir);
+
static INLINE int is_interinter_compound_used(COMPOUND_TYPE type,
BLOCK_SIZE sb_type) {
- (void)sb_type;
+ const int comp_allowed = is_comp_ref_allowed(sb_type);
switch (type) {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- case COMPOUND_AVERAGE: return sb_type >= BLOCK_4X4;
-#else // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- case COMPOUND_AVERAGE: return 1;
-#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-#if CONFIG_WEDGE
- case COMPOUND_WEDGE: return wedge_params_lookup[sb_type].bits > 0;
-#endif // CONFIG_WEDGE
-#if CONFIG_COMPOUND_SEGMENT
- case COMPOUND_SEG:
- return AOMMIN(block_size_wide[sb_type], block_size_high[sb_type]) >= 8;
-#endif // CONFIG_COMPOUND_SEGMENT
+ case COMPOUND_AVERAGE:
+ case COMPOUND_DIFFWTD: return comp_allowed;
+ case COMPOUND_WEDGE:
+ return comp_allowed && wedge_params_lookup[sb_type].bits > 0;
default: assert(0); return 0;
}
}
static INLINE int is_any_masked_compound_used(BLOCK_SIZE sb_type) {
COMPOUND_TYPE comp_type;
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- if (sb_type < BLOCK_4X4) return 0;
-#endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- for (comp_type = 0; comp_type < COMPOUND_TYPES; comp_type++) {
+ int i;
+ if (!is_comp_ref_allowed(sb_type)) return 0;
+ for (i = 0; i < COMPOUND_TYPES; i++) {
+ comp_type = (COMPOUND_TYPE)i;
if (is_masked_compound_type(comp_type) &&
is_interinter_compound_used(comp_type, sb_type))
return 1;
@@ -257,7 +189,6 @@ static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
}
static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
- (void)sb_type;
return wedge_params_lookup[sb_type].bits > 0;
}
@@ -265,60 +196,22 @@ static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
return wedge_params_lookup[sb_type].bits;
}
-#if CONFIG_COMPOUND_SEGMENT
-void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w);
-#if CONFIG_HIGHBITDEPTH
-void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- BLOCK_SIZE sb_type, int h, int w, int bd);
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_COMPOUND_SEGMENT
+void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, const SubpelParams *subpel_params,
+ const struct scale_factors *sf, int w, int h,
+ ConvolveParams *conv_params,
+ InterpFilters interp_filters,
+ const WarpTypesAllowed *warp_types, int p_col,
+ int p_row, int plane, int ref,
+ const MB_MODE_INFO *mi, int build_for_obmc,
+ const MACROBLOCKD *xd, int can_use_previous);
void av1_make_masked_inter_predictor(
const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
- const int subpel_x, const int subpel_y, const struct scale_factors *sf,
- int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
- int xs, int ys,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- int plane,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
+ int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- MACROBLOCKD *xd);
-
-static INLINE int round_mv_comp_q4(int value) {
- return (value < 0 ? value - 2 : value + 2) / 4;
-}
-
-static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
- MV res = {
- round_mv_comp_q4(
- mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row +
- mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row),
- round_mv_comp_q4(
- mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col +
- mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col)
- };
- return res;
-}
-
-static INLINE int round_mv_comp_q2(int value) {
- return (value < 0 ? value - 1 : value + 1) / 2;
-}
-
-static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
- MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
- mi->bmi[block1].as_mv[idx].as_mv.row),
- round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
- mi->bmi[block1].as_mv[idx].as_mv.col) };
- return res;
-}
+ MACROBLOCKD *xd, int can_use_previous);
// TODO(jkoleszar): yet another mv clamping function :-(
static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
@@ -331,8 +224,8 @@ static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
const int spel_right = spel_left - SUBPEL_SHIFTS;
const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
const int spel_bottom = spel_top - SUBPEL_SHIFTS;
- MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
- src_mv->col * (1 << (1 - ss_x)) };
+ MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))),
+ (int16_t)(src_mv->col * (1 << (1 - ss_x))) };
assert(ss_x <= 1);
assert(ss_y <= 1);
@@ -344,20 +237,6 @@ static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
return clamped_mv;
}
-static INLINE MV average_split_mvs(const struct macroblockd_plane *pd,
- const MODE_INFO *mi, int ref, int block) {
- const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
- MV res = { 0, 0 };
- switch (ss_idx) {
- case 0: res = mi->bmi[block].as_mv[ref].as_mv; break;
- case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break;
- case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break;
- case 3: res = mi_mv_pred_q4(mi, ref); break;
- default: assert(ss_idx <= 3 && ss_idx >= 0);
- }
- return res;
-}
-
void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
@@ -370,48 +249,22 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
-#if CONFIG_SUPERTX
-void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row_ori,
- int mi_col_ori, int mi_row,
- int mi_col, int plane,
- BLOCK_SIZE bsize, int block);
-
-void av1_build_inter_predictor_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row_ori, int mi_col_ori,
- int mi_row, int mi_col, int plane,
- BLOCK_SIZE bsize);
-struct macroblockd_plane;
-void av1_build_masked_inter_predictor_complex(
- MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
- int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
- BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
- int plane);
-#endif // CONFIG_SUPERTX
-
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, const MV *src_mv,
const struct scale_factors *sf, int w, int h,
ConvolveParams *conv_params,
InterpFilters interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
const WarpTypesAllowed *warp_types, int p_col,
int p_row, int plane, int ref,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd);
+ const MACROBLOCKD *xd, int can_use_previous);
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_build_inter_predictor(
const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
- InterpFilters interp_filters,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- const WarpTypesAllowed *warp_types, int p_col, int p_row,
-#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- int plane, enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd);
-#endif
+ InterpFilters interp_filters, const WarpTypesAllowed *warp_types, int p_col,
+ int p_row, int plane, enum mv_precision precision, int x, int y,
+ const MACROBLOCKD *xd, int can_use_previous);
static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
const struct scale_factors *sf) {
@@ -427,15 +280,11 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
int stride, int mi_row, int mi_col,
const struct scale_factors *scale,
int subsampling_x, int subsampling_y) {
-#if CONFIG_CHROMA_SUB8X8
// Offset the buffer pointer
if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
mi_row -= 1;
if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
mi_col -= 1;
-#else
- (void)bsize;
-#endif
const int x = (MI_SIZE * mi_col) >> subsampling_x;
const int y = (MI_SIZE * mi_row) >> subsampling_y;
@@ -447,62 +296,33 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
}
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
- const YV12_BUFFER_CONFIG *src, int mi_row,
- int mi_col);
+ const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
+ const int plane_start, const int plane_end);
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *sf);
+ const struct scale_factors *sf, const int num_planes);
// Detect if the block have sub-pixel level motion vectors
// per component.
#define CHECK_SUBPEL 0
-static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
+static INLINE int has_subpel_mv_component(const MB_MODE_INFO *const mbmi,
const MACROBLOCKD *const xd,
int dir) {
#if CHECK_SUBPEL
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
int plane;
int ref = (dir >> 1);
-#if CONFIG_CB4X4
- const int unify_bsize = 1;
-#else
- const int unify_bsize = 0;
-#endif
- if (bsize >= BLOCK_8X8 || unify_bsize) {
- if (dir & 0x01) {
- if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
- } else {
- if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
- }
+ if (dir & 0x01) {
+ if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
} else {
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
-
- int x, y;
- for (y = 0; y < num_4x4_h; ++y) {
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
- if (dir & 0x01) {
- if (mv.col & SUBPEL_MASK) return 1;
- } else {
- if (mv.row & SUBPEL_MASK) return 1;
- }
- }
- }
- }
+ if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
}
return 0;
#else
- (void)mi;
+ (void)mbmi;
(void)xd;
(void)dir;
return 1;
@@ -516,20 +336,16 @@ static INLINE void set_default_interp_filters(
}
static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) {
- (void)xd;
-#if CONFIG_WARPED_MOTION
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
+ if (mbmi->skip_mode) return 0;
if (mbmi->motion_mode == WARPED_CAUSAL) return 0;
-#endif // CONFIG_WARPED_MOTION
-#if CONFIG_GLOBAL_MOTION
- if (is_nontrans_global_motion(xd)) return 0;
-#endif // CONFIG_GLOBAL_MOTION
+ if (is_nontrans_global_motion(xd, xd->mi[0])) return 0;
return 1;
}
static INLINE int av1_is_interp_search_needed(const MACROBLOCKD *const xd) {
- MODE_INFO *const mi = xd->mi[0];
- const int is_compound = has_second_ref(&mi->mbmi);
+ MB_MODE_INFO *const mi = xd->mi[0];
+ const int is_compound = has_second_ref(mi);
int ref;
for (ref = 0; ref < 1 + is_compound; ++ref) {
int row_col;
@@ -542,17 +358,15 @@ static INLINE int av1_is_interp_search_needed(const MACROBLOCKD *const xd) {
}
return 0;
}
-
-#if CONFIG_MOTION_VAR
-const uint8_t *av1_get_obmc_mask(int length);
-void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col);
-void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *above[MAX_MB_PLANE],
- int above_stride[MAX_MB_PLANE],
- uint8_t *left[MAX_MB_PLANE],
- int left_stride[MAX_MB_PLANE]);
+void av1_setup_build_prediction_by_above_pred(
+ MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
+ MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
+ const int num_planes);
+void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
+ uint8_t left_mi_height,
+ MB_MODE_INFO *left_mbmi,
+ struct build_prediction_ctxt *ctxt,
+ const int num_planes);
void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *tmp_buf[MAX_MB_PLANE],
@@ -565,13 +379,18 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
int tmp_width[MAX_MB_PLANE],
int tmp_height[MAX_MB_PLANE],
int tmp_stride[MAX_MB_PLANE]);
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *above[MAX_MB_PLANE],
+ int above_stride[MAX_MB_PLANE],
+ uint8_t *left[MAX_MB_PLANE],
+ int left_stride[MAX_MB_PLANE]);
+
+const uint8_t *av1_get_obmc_mask(int length);
+void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col);
void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col);
-#if CONFIG_NCOBMC
-void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col);
-#endif
-#endif // CONFIG_MOTION_VAR
#define MASK_MASTER_SIZE ((MAX_WEDGE_SIZE) << 1)
#define MASK_MASTER_STRIDE (MASK_MASTER_SIZE)
@@ -584,32 +403,24 @@ static INLINE const uint8_t *av1_get_contiguous_soft_mask(int wedge_index,
return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
}
-const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
- BLOCK_SIZE sb_type, int wedge_offset_x,
- int wedge_offset_y);
-
-const uint8_t *av1_get_compound_type_mask_inverse(
- const INTERINTER_COMPOUND_DATA *const comp_data,
-#if CONFIG_COMPOUND_SEGMENT
- uint8_t *mask_buffer, int h, int w, int stride,
-#endif
- BLOCK_SIZE sb_type);
-
const uint8_t *av1_get_compound_type_mask(
const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type);
-#if CONFIG_INTERINTRA
+
void av1_build_interintra_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *ypred, uint8_t *upred,
uint8_t *vpred, int ystride, int ustride,
int vstride, BUFFER_SET *ctx,
BLOCK_SIZE bsize);
+
void av1_build_interintra_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *ypred, int ystride,
BUFFER_SET *ctx, BLOCK_SIZE bsize);
+
void av1_build_interintra_predictors_sbc(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *upred, int ustride,
BUFFER_SET *ctx, int plane,
BLOCK_SIZE bsize);
+
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
uint8_t *upred, uint8_t *vpred,
int ustride, int vstride,
@@ -621,57 +432,27 @@ void av1_build_intra_predictors_for_interintra(
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
const uint8_t *inter_pred, int inter_stride,
const uint8_t *intra_pred, int intra_stride);
-#endif // CONFIG_INTERINTRA
+
// Encoder only
void av1_build_inter_predictors_for_planes_single_buf(
MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
- int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]);
-void av1_build_wedge_inter_predictor_from_buf(
- MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
-#if CONFIG_SUPERTX
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_SUPERTX
- uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
- int ext_dst_stride1[3]);
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-#define ASSIGN_ALIGNED_PTRS(p, a, s) \
- p[0] = a; \
- p[1] = a + s; \
- p[2] = a + 2 * s;
-
-#define ASSIGN_ALIGNED_PTRS_HBD(p, a, s, l) \
- p[0] = CONVERT_TO_BYTEPTR(a); \
- p[1] = CONVERT_TO_BYTEPTR(a + s * l); \
- p[2] = CONVERT_TO_BYTEPTR(a + 2 * s * l);
-
-void alloc_ncobmc_pred_buffer(MACROBLOCKD *const xd);
-void free_ncobmc_pred_buffer(MACROBLOCKD *const xd);
-void set_sb_mi_boundaries(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
- const int mi_row, const int mi_col);
-
-void reset_xd_boundary(MACROBLOCKD *xd, int mi_row, int bh, int mi_col, int bw,
- int mi_rows, int mi_cols);
-
-void get_pred_from_intrpl_buf(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int plane);
-
-void build_ncobmc_intrpl_pred(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- int plane, int pxl_row, int pxl_col,
- BLOCK_SIZE bsize, uint8_t *preds[][MAX_MB_PLANE],
- int ps[MAX_MB_PLANE], // pred buffer strides
- int mode);
-
-void av1_get_ext_blk_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[][MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]);
-
-void av1_get_ori_blk_pred(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
- int mi_row, int mi_col,
- uint8_t *dst_buf[MAX_MB_PLANE],
- int dst_stride[MAX_MB_PLANE]);
-#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
+ int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
+ int can_use_previous);
+void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+ int plane_from, int plane_to,
+ uint8_t *ext_dst0[3],
+ int ext_dst_stride0[3],
+ uint8_t *ext_dst1[3],
+ int ext_dst_stride1[3]);
+
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+ int order_idx, int *fwd_offset, int *bck_offset,
+ int *use_jnt_comp_avg, int is_compound);
+int av1_allow_warp(const MB_MODE_INFO *const mbmi,
+ const WarpTypesAllowed *const warp_types,
+ const WarpedMotionParams *const gm_params,
+ int build_for_obmc, int x_scale, int y_scale,
+ WarpedMotionParams *final_warp_params);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/reconintra.c b/third_party/aom/av1/common/reconintra.c
index c6d57b742..21d1f60b2 100644
--- a/third_party/aom/av1/common/reconintra.c
+++ b/third_party/aom/av1/common/reconintra.c
@@ -11,22 +11,18 @@
#include <math.h>
-#include "./av1_rtcd.h"
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "aom_ports/system_state.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
-#if CONFIG_HIGHBITDEPTH
#include "aom_dsp/aom_dsp_common.h"
-#endif // CONFIG_HIGHBITDEPTH
#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
#include "aom_ports/aom_once.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
#include "av1/common/reconintra.h"
#include "av1/common/onyxc_int.h"
-#if CONFIG_CFL
#include "av1/common/cfl.h"
-#endif
enum {
NEED_LEFT = 1 << 1,
@@ -36,17 +32,9 @@ enum {
NEED_BOTTOMLEFT = 1 << 5,
};
-#if CONFIG_INTRA_EDGE
#define INTRA_EDGE_FILT 3
#define INTRA_EDGE_TAPS 5
-#if CONFIG_INTRA_EDGE_UPSAMPLE
-#define MAX_UPSAMPLE_SZ 12
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
-#endif // CONFIG_INTRA_EDGE
-
-#define INTRA_USES_EXT_TRANSFORMS 1
-#define INTRA_USES_RECT_TRANSFORMS \
- (CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#define MAX_UPSAMPLE_SZ 16
static const uint8_t extend_modes[INTRA_MODES] = {
NEED_ABOVE | NEED_LEFT, // DC
@@ -54,515 +42,187 @@ static const uint8_t extend_modes[INTRA_MODES] = {
NEED_LEFT, // H
NEED_ABOVE | NEED_ABOVERIGHT, // D45
NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D117
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D153
- NEED_LEFT | NEED_BOTTOMLEFT, // D207
- NEED_ABOVE | NEED_ABOVERIGHT, // D63
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
+ NEED_LEFT | NEED_BOTTOMLEFT, // D203
+ NEED_ABOVE | NEED_ABOVERIGHT, // D67
NEED_LEFT | NEED_ABOVE, // SMOOTH
-#if CONFIG_SMOOTH_HV
NEED_LEFT | NEED_ABOVE, // SMOOTH_V
NEED_LEFT | NEED_ABOVE, // SMOOTH_H
-#endif // CONFIG_SMOOTH_HV
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
};
-static const uint16_t orders_128x128[1] = { 0 };
-static const uint16_t orders_128x64[2] = { 0, 1 };
-static const uint16_t orders_64x128[2] = { 0, 1 };
-static const uint16_t orders_64x64[4] = {
- 0, 1, 2, 3,
-};
-static const uint16_t orders_64x32[8] = {
- 0, 2, 1, 3, 4, 6, 5, 7,
+// Tables to store if the top-right reference pixels are available. The flags
+// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
+// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
+// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
+// i.e. (table[10 / 8] >> (10 % 8)) & 1.
+// . . . .
+// . . . .
+// . . o .
+// . . . .
+static uint8_t has_tr_4x4[128] = {
+ 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
+ 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
};
-static const uint16_t orders_32x64[8] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
+static uint8_t has_tr_4x8[64] = {
+ 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
+ 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
+ 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
+ 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
+ 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
};
-static const uint16_t orders_32x32[16] = {
- 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15,
+static uint8_t has_tr_8x4[64] = {
+ 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
+ 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
+ 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
+ 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
};
-static const uint16_t orders_32x16[32] = {
- 0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15,
- 16, 18, 24, 26, 17, 19, 25, 27, 20, 22, 28, 30, 21, 23, 29, 31,
+static uint8_t has_tr_8x8[32] = {
+ 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
+ 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
};
-static const uint16_t orders_16x32[32] = {
- 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15,
- 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31,
+static uint8_t has_tr_8x16[16] = {
+ 255, 255, 119, 119, 127, 127, 119, 119,
+ 255, 127, 119, 119, 127, 127, 119, 119,
};
-static const uint16_t orders_16x16[64] = {
- 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23,
- 8, 9, 12, 13, 24, 25, 28, 29, 10, 11, 14, 15, 26, 27, 30, 31,
- 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, 50, 51, 54, 55,
- 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63,
+static uint8_t has_tr_16x8[16] = {
+ 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
};
-
-static const uint16_t orders_64x16[16] = {
- 0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15,
+static uint8_t has_tr_16x16[8] = {
+ 255, 85, 119, 85, 127, 85, 119, 85,
};
-static const uint16_t orders_16x64[16] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
+static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
+static uint8_t has_tr_32x32[2] = { 95, 87 };
+static uint8_t has_tr_32x64[1] = { 127 };
+static uint8_t has_tr_64x32[1] = { 19 };
+static uint8_t has_tr_64x64[1] = { 7 };
+static uint8_t has_tr_64x128[1] = { 3 };
+static uint8_t has_tr_128x64[1] = { 1 };
+static uint8_t has_tr_128x128[1] = { 1 };
+static uint8_t has_tr_4x16[32] = {
+ 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
+ 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
+ 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
};
-static const uint16_t orders_32x8[64] = {
- 0, 4, 16, 20, 1, 5, 17, 21, 2, 6, 18, 22, 3, 7, 19, 23,
- 8, 12, 24, 28, 9, 13, 25, 29, 10, 14, 26, 30, 11, 15, 27, 31,
- 32, 36, 48, 52, 33, 37, 49, 53, 34, 38, 50, 54, 35, 39, 51, 55,
- 40, 44, 56, 60, 41, 45, 57, 61, 42, 46, 58, 62, 43, 47, 59, 63,
+static uint8_t has_tr_16x4[32] = {
+ 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
+ 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
};
-static const uint16_t orders_8x32[64] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23,
- 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, 52, 53, 54, 55,
- 40, 41, 42, 43, 44, 45, 46, 47, 56, 57, 58, 59, 60, 61, 62, 63,
+static uint8_t has_tr_8x32[8] = {
+ 255, 255, 127, 127, 255, 127, 127, 127,
};
-
-#if CONFIG_EXT_PARTITION
-static const uint16_t orders_16x4[256] = {
- 0, 4, 16, 20, 64, 68, 80, 84, 1, 5, 17, 21, 65, 69, 81,
- 85, 2, 6, 18, 22, 66, 70, 82, 86, 3, 7, 19, 23, 67, 71,
- 83, 87, 8, 12, 24, 28, 72, 76, 88, 92, 9, 13, 25, 29, 73,
- 77, 89, 93, 10, 14, 26, 30, 74, 78, 90, 94, 11, 15, 27, 31,
- 75, 79, 91, 95, 32, 36, 48, 52, 96, 100, 112, 116, 33, 37, 49,
- 53, 97, 101, 113, 117, 34, 38, 50, 54, 98, 102, 114, 118, 35, 39,
- 51, 55, 99, 103, 115, 119, 40, 44, 56, 60, 104, 108, 120, 124, 41,
- 45, 57, 61, 105, 109, 121, 125, 42, 46, 58, 62, 106, 110, 122, 126,
- 43, 47, 59, 63, 107, 111, 123, 127, 128, 132, 144, 148, 192, 196, 208,
- 212, 129, 133, 145, 149, 193, 197, 209, 213, 130, 134, 146, 150, 194, 198,
- 210, 214, 131, 135, 147, 151, 195, 199, 211, 215, 136, 140, 152, 156, 200,
- 204, 216, 220, 137, 141, 153, 157, 201, 205, 217, 221, 138, 142, 154, 158,
- 202, 206, 218, 222, 139, 143, 155, 159, 203, 207, 219, 223, 160, 164, 176,
- 180, 224, 228, 240, 244, 161, 165, 177, 181, 225, 229, 241, 245, 162, 166,
- 178, 182, 226, 230, 242, 246, 163, 167, 179, 183, 227, 231, 243, 247, 168,
- 172, 184, 188, 232, 236, 248, 252, 169, 173, 185, 189, 233, 237, 249, 253,
- 170, 174, 186, 190, 234, 238, 250, 254, 171, 175, 187, 191, 235, 239, 251,
- 255,
+static uint8_t has_tr_32x8[8] = {
+ 15, 0, 5, 0, 7, 0, 5, 0,
};
-static const uint16_t orders_4x16[256] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22,
- 23, 64, 65, 66, 67, 68, 69, 70, 71, 80, 81, 82, 83, 84, 85,
- 86, 87, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28,
- 29, 30, 31, 72, 73, 74, 75, 76, 77, 78, 79, 88, 89, 90, 91,
- 92, 93, 94, 95, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50,
- 51, 52, 53, 54, 55, 96, 97, 98, 99, 100, 101, 102, 103, 112, 113,
- 114, 115, 116, 117, 118, 119, 40, 41, 42, 43, 44, 45, 46, 47, 56,
- 57, 58, 59, 60, 61, 62, 63, 104, 105, 106, 107, 108, 109, 110, 111,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 144, 145, 146, 147, 148, 149, 150, 151, 192, 193, 194, 195, 196, 197,
- 198, 199, 208, 209, 210, 211, 212, 213, 214, 215, 136, 137, 138, 139, 140,
- 141, 142, 143, 152, 153, 154, 155, 156, 157, 158, 159, 200, 201, 202, 203,
- 204, 205, 206, 207, 216, 217, 218, 219, 220, 221, 222, 223, 160, 161, 162,
- 163, 164, 165, 166, 167, 176, 177, 178, 179, 180, 181, 182, 183, 224, 225,
- 226, 227, 228, 229, 230, 231, 240, 241, 242, 243, 244, 245, 246, 247, 168,
- 169, 170, 171, 172, 173, 174, 175, 184, 185, 186, 187, 188, 189, 190, 191,
- 232, 233, 234, 235, 236, 237, 238, 239, 248, 249, 250, 251, 252, 253, 254,
- 255,
+static uint8_t has_tr_16x64[2] = { 255, 127 };
+static uint8_t has_tr_64x16[2] = { 3, 1 };
+
+static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
+ // 4X4
+ has_tr_4x4,
+ // 4X8, 8X4, 8X8
+ has_tr_4x8, has_tr_8x4, has_tr_8x8,
+ // 8X16, 16X8, 16X16
+ has_tr_8x16, has_tr_16x8, has_tr_16x16,
+ // 16X32, 32X16, 32X32
+ has_tr_16x32, has_tr_32x16, has_tr_32x32,
+ // 32X64, 64X32, 64X64
+ has_tr_32x64, has_tr_64x32, has_tr_64x64,
+ // 64x128, 128x64, 128x128
+ has_tr_64x128, has_tr_128x64, has_tr_128x128,
+ // 4x16, 16x4, 8x32
+ has_tr_4x16, has_tr_16x4, has_tr_8x32,
+ // 32x8, 16x64, 64x16
+ has_tr_32x8, has_tr_16x64, has_tr_64x16
};
-#endif
-static const uint16_t orders_32x128[4] = {
- 0, 1, 2, 3,
-};
-static const uint16_t orders_128x32[4] = {
- 0, 1, 2, 3,
-};
-
-#if CONFIG_CB4X4 || CONFIG_EXT_PARTITION
-static const uint16_t orders_16x8[128] = {
- 0, 2, 8, 10, 32, 34, 40, 42, 1, 3, 9, 11, 33, 35, 41, 43,
- 4, 6, 12, 14, 36, 38, 44, 46, 5, 7, 13, 15, 37, 39, 45, 47,
- 16, 18, 24, 26, 48, 50, 56, 58, 17, 19, 25, 27, 49, 51, 57, 59,
- 20, 22, 28, 30, 52, 54, 60, 62, 21, 23, 29, 31, 53, 55, 61, 63,
- 64, 66, 72, 74, 96, 98, 104, 106, 65, 67, 73, 75, 97, 99, 105, 107,
- 68, 70, 76, 78, 100, 102, 108, 110, 69, 71, 77, 79, 101, 103, 109, 111,
- 80, 82, 88, 90, 112, 114, 120, 122, 81, 83, 89, 91, 113, 115, 121, 123,
- 84, 86, 92, 94, 116, 118, 124, 126, 85, 87, 93, 95, 117, 119, 125, 127,
-};
-static const uint16_t orders_8x16[128] = {
- 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43,
- 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47,
- 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59,
- 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63,
- 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107,
- 68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
- 80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
- 84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
+static uint8_t has_tr_vert_8x8[32] = {
+ 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
+ 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
};
-static const uint16_t orders_8x8[256] = {
- 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84,
- 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83,
- 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88,
- 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79,
- 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100,
- 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99,
- 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104,
- 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63,
- 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148,
- 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147,
- 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152,
- 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143,
- 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164,
- 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163,
- 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168,
- 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
- 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254,
- 255,
+static uint8_t has_tr_vert_16x16[8] = {
+ 255, 0, 119, 0, 127, 0, 119, 0,
};
-
-#if CONFIG_CB4X4 && CONFIG_EXT_PARTITION
-static const uint16_t orders_4x8[512] = {
- 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42,
- 43, 128, 129, 130, 131, 136, 137, 138, 139, 160, 161, 162, 163, 168, 169,
- 170, 171, 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44,
- 45, 46, 47, 132, 133, 134, 135, 140, 141, 142, 143, 164, 165, 166, 167,
- 172, 173, 174, 175, 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50,
- 51, 56, 57, 58, 59, 144, 145, 146, 147, 152, 153, 154, 155, 176, 177,
- 178, 179, 184, 185, 186, 187, 20, 21, 22, 23, 28, 29, 30, 31, 52,
- 53, 54, 55, 60, 61, 62, 63, 148, 149, 150, 151, 156, 157, 158, 159,
- 180, 181, 182, 183, 188, 189, 190, 191, 64, 65, 66, 67, 72, 73, 74,
- 75, 96, 97, 98, 99, 104, 105, 106, 107, 192, 193, 194, 195, 200, 201,
- 202, 203, 224, 225, 226, 227, 232, 233, 234, 235, 68, 69, 70, 71, 76,
- 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111, 196, 197, 198, 199,
- 204, 205, 206, 207, 228, 229, 230, 231, 236, 237, 238, 239, 80, 81, 82,
- 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123, 208, 209,
- 210, 211, 216, 217, 218, 219, 240, 241, 242, 243, 248, 249, 250, 251, 84,
- 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
- 212, 213, 214, 215, 220, 221, 222, 223, 244, 245, 246, 247, 252, 253, 254,
- 255, 256, 257, 258, 259, 264, 265, 266, 267, 288, 289, 290, 291, 296, 297,
- 298, 299, 384, 385, 386, 387, 392, 393, 394, 395, 416, 417, 418, 419, 424,
- 425, 426, 427, 260, 261, 262, 263, 268, 269, 270, 271, 292, 293, 294, 295,
- 300, 301, 302, 303, 388, 389, 390, 391, 396, 397, 398, 399, 420, 421, 422,
- 423, 428, 429, 430, 431, 272, 273, 274, 275, 280, 281, 282, 283, 304, 305,
- 306, 307, 312, 313, 314, 315, 400, 401, 402, 403, 408, 409, 410, 411, 432,
- 433, 434, 435, 440, 441, 442, 443, 276, 277, 278, 279, 284, 285, 286, 287,
- 308, 309, 310, 311, 316, 317, 318, 319, 404, 405, 406, 407, 412, 413, 414,
- 415, 436, 437, 438, 439, 444, 445, 446, 447, 320, 321, 322, 323, 328, 329,
- 330, 331, 352, 353, 354, 355, 360, 361, 362, 363, 448, 449, 450, 451, 456,
- 457, 458, 459, 480, 481, 482, 483, 488, 489, 490, 491, 324, 325, 326, 327,
- 332, 333, 334, 335, 356, 357, 358, 359, 364, 365, 366, 367, 452, 453, 454,
- 455, 460, 461, 462, 463, 484, 485, 486, 487, 492, 493, 494, 495, 336, 337,
- 338, 339, 344, 345, 346, 347, 368, 369, 370, 371, 376, 377, 378, 379, 464,
- 465, 466, 467, 472, 473, 474, 475, 496, 497, 498, 499, 504, 505, 506, 507,
- 340, 341, 342, 343, 348, 349, 350, 351, 372, 373, 374, 375, 380, 381, 382,
- 383, 468, 469, 470, 471, 476, 477, 478, 479, 500, 501, 502, 503, 508, 509,
- 510, 511,
-};
-
-static const uint16_t orders_8x4[512] = {
- 0, 2, 8, 10, 32, 34, 40, 42, 128, 130, 136, 138, 160, 162, 168,
- 170, 1, 3, 9, 11, 33, 35, 41, 43, 129, 131, 137, 139, 161, 163,
- 169, 171, 4, 6, 12, 14, 36, 38, 44, 46, 132, 134, 140, 142, 164,
- 166, 172, 174, 5, 7, 13, 15, 37, 39, 45, 47, 133, 135, 141, 143,
- 165, 167, 173, 175, 16, 18, 24, 26, 48, 50, 56, 58, 144, 146, 152,
- 154, 176, 178, 184, 186, 17, 19, 25, 27, 49, 51, 57, 59, 145, 147,
- 153, 155, 177, 179, 185, 187, 20, 22, 28, 30, 52, 54, 60, 62, 148,
- 150, 156, 158, 180, 182, 188, 190, 21, 23, 29, 31, 53, 55, 61, 63,
- 149, 151, 157, 159, 181, 183, 189, 191, 64, 66, 72, 74, 96, 98, 104,
- 106, 192, 194, 200, 202, 224, 226, 232, 234, 65, 67, 73, 75, 97, 99,
- 105, 107, 193, 195, 201, 203, 225, 227, 233, 235, 68, 70, 76, 78, 100,
- 102, 108, 110, 196, 198, 204, 206, 228, 230, 236, 238, 69, 71, 77, 79,
- 101, 103, 109, 111, 197, 199, 205, 207, 229, 231, 237, 239, 80, 82, 88,
- 90, 112, 114, 120, 122, 208, 210, 216, 218, 240, 242, 248, 250, 81, 83,
- 89, 91, 113, 115, 121, 123, 209, 211, 217, 219, 241, 243, 249, 251, 84,
- 86, 92, 94, 116, 118, 124, 126, 212, 214, 220, 222, 244, 246, 252, 254,
- 85, 87, 93, 95, 117, 119, 125, 127, 213, 215, 221, 223, 245, 247, 253,
- 255, 256, 258, 264, 266, 288, 290, 296, 298, 384, 386, 392, 394, 416, 418,
- 424, 426, 257, 259, 265, 267, 289, 291, 297, 299, 385, 387, 393, 395, 417,
- 419, 425, 427, 260, 262, 268, 270, 292, 294, 300, 302, 388, 390, 396, 398,
- 420, 422, 428, 430, 261, 263, 269, 271, 293, 295, 301, 303, 389, 391, 397,
- 399, 421, 423, 429, 431, 272, 274, 280, 282, 304, 306, 312, 314, 400, 402,
- 408, 410, 432, 434, 440, 442, 273, 275, 281, 283, 305, 307, 313, 315, 401,
- 403, 409, 411, 433, 435, 441, 443, 276, 278, 284, 286, 308, 310, 316, 318,
- 404, 406, 412, 414, 436, 438, 444, 446, 277, 279, 285, 287, 309, 311, 317,
- 319, 405, 407, 413, 415, 437, 439, 445, 447, 320, 322, 328, 330, 352, 354,
- 360, 362, 448, 450, 456, 458, 480, 482, 488, 490, 321, 323, 329, 331, 353,
- 355, 361, 363, 449, 451, 457, 459, 481, 483, 489, 491, 324, 326, 332, 334,
- 356, 358, 364, 366, 452, 454, 460, 462, 484, 486, 492, 494, 325, 327, 333,
- 335, 357, 359, 365, 367, 453, 455, 461, 463, 485, 487, 493, 495, 336, 338,
- 344, 346, 368, 370, 376, 378, 464, 466, 472, 474, 496, 498, 504, 506, 337,
- 339, 345, 347, 369, 371, 377, 379, 465, 467, 473, 475, 497, 499, 505, 507,
- 340, 342, 348, 350, 372, 374, 380, 382, 468, 470, 476, 478, 500, 502, 508,
- 510, 341, 343, 349, 351, 373, 375, 381, 383, 469, 471, 477, 479, 501, 503,
- 509, 511,
-};
-
-static const uint16_t orders_4x4[1024] = {
- 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80,
- 81, 84, 85, 256, 257, 260, 261, 272, 273, 276, 277, 320, 321,
- 324, 325, 336, 337, 340, 341, 2, 3, 6, 7, 18, 19, 22,
- 23, 66, 67, 70, 71, 82, 83, 86, 87, 258, 259, 262, 263,
- 274, 275, 278, 279, 322, 323, 326, 327, 338, 339, 342, 343, 8,
- 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, 89,
- 92, 93, 264, 265, 268, 269, 280, 281, 284, 285, 328, 329, 332,
- 333, 344, 345, 348, 349, 10, 11, 14, 15, 26, 27, 30, 31,
- 74, 75, 78, 79, 90, 91, 94, 95, 266, 267, 270, 271, 282,
- 283, 286, 287, 330, 331, 334, 335, 346, 347, 350, 351, 32, 33,
- 36, 37, 48, 49, 52, 53, 96, 97, 100, 101, 112, 113, 116,
- 117, 288, 289, 292, 293, 304, 305, 308, 309, 352, 353, 356, 357,
- 368, 369, 372, 373, 34, 35, 38, 39, 50, 51, 54, 55, 98,
- 99, 102, 103, 114, 115, 118, 119, 290, 291, 294, 295, 306, 307,
- 310, 311, 354, 355, 358, 359, 370, 371, 374, 375, 40, 41, 44,
- 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125,
- 296, 297, 300, 301, 312, 313, 316, 317, 360, 361, 364, 365, 376,
- 377, 380, 381, 42, 43, 46, 47, 58, 59, 62, 63, 106, 107,
- 110, 111, 122, 123, 126, 127, 298, 299, 302, 303, 314, 315, 318,
- 319, 362, 363, 366, 367, 378, 379, 382, 383, 128, 129, 132, 133,
- 144, 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213, 384,
- 385, 388, 389, 400, 401, 404, 405, 448, 449, 452, 453, 464, 465,
- 468, 469, 130, 131, 134, 135, 146, 147, 150, 151, 194, 195, 198,
- 199, 210, 211, 214, 215, 386, 387, 390, 391, 402, 403, 406, 407,
- 450, 451, 454, 455, 466, 467, 470, 471, 136, 137, 140, 141, 152,
- 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 392, 393,
- 396, 397, 408, 409, 412, 413, 456, 457, 460, 461, 472, 473, 476,
- 477, 138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207,
- 218, 219, 222, 223, 394, 395, 398, 399, 410, 411, 414, 415, 458,
- 459, 462, 463, 474, 475, 478, 479, 160, 161, 164, 165, 176, 177,
- 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 416, 417, 420,
- 421, 432, 433, 436, 437, 480, 481, 484, 485, 496, 497, 500, 501,
- 162, 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242,
- 243, 246, 247, 418, 419, 422, 423, 434, 435, 438, 439, 482, 483,
- 486, 487, 498, 499, 502, 503, 168, 169, 172, 173, 184, 185, 188,
- 189, 232, 233, 236, 237, 248, 249, 252, 253, 424, 425, 428, 429,
- 440, 441, 444, 445, 488, 489, 492, 493, 504, 505, 508, 509, 170,
- 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251,
- 254, 255, 426, 427, 430, 431, 442, 443, 446, 447, 490, 491, 494,
- 495, 506, 507, 510, 511, 512, 513, 516, 517, 528, 529, 532, 533,
- 576, 577, 580, 581, 592, 593, 596, 597, 768, 769, 772, 773, 784,
- 785, 788, 789, 832, 833, 836, 837, 848, 849, 852, 853, 514, 515,
- 518, 519, 530, 531, 534, 535, 578, 579, 582, 583, 594, 595, 598,
- 599, 770, 771, 774, 775, 786, 787, 790, 791, 834, 835, 838, 839,
- 850, 851, 854, 855, 520, 521, 524, 525, 536, 537, 540, 541, 584,
- 585, 588, 589, 600, 601, 604, 605, 776, 777, 780, 781, 792, 793,
- 796, 797, 840, 841, 844, 845, 856, 857, 860, 861, 522, 523, 526,
- 527, 538, 539, 542, 543, 586, 587, 590, 591, 602, 603, 606, 607,
- 778, 779, 782, 783, 794, 795, 798, 799, 842, 843, 846, 847, 858,
- 859, 862, 863, 544, 545, 548, 549, 560, 561, 564, 565, 608, 609,
- 612, 613, 624, 625, 628, 629, 800, 801, 804, 805, 816, 817, 820,
- 821, 864, 865, 868, 869, 880, 881, 884, 885, 546, 547, 550, 551,
- 562, 563, 566, 567, 610, 611, 614, 615, 626, 627, 630, 631, 802,
- 803, 806, 807, 818, 819, 822, 823, 866, 867, 870, 871, 882, 883,
- 886, 887, 552, 553, 556, 557, 568, 569, 572, 573, 616, 617, 620,
- 621, 632, 633, 636, 637, 808, 809, 812, 813, 824, 825, 828, 829,
- 872, 873, 876, 877, 888, 889, 892, 893, 554, 555, 558, 559, 570,
- 571, 574, 575, 618, 619, 622, 623, 634, 635, 638, 639, 810, 811,
- 814, 815, 826, 827, 830, 831, 874, 875, 878, 879, 890, 891, 894,
- 895, 640, 641, 644, 645, 656, 657, 660, 661, 704, 705, 708, 709,
- 720, 721, 724, 725, 896, 897, 900, 901, 912, 913, 916, 917, 960,
- 961, 964, 965, 976, 977, 980, 981, 642, 643, 646, 647, 658, 659,
- 662, 663, 706, 707, 710, 711, 722, 723, 726, 727, 898, 899, 902,
- 903, 914, 915, 918, 919, 962, 963, 966, 967, 978, 979, 982, 983,
- 648, 649, 652, 653, 664, 665, 668, 669, 712, 713, 716, 717, 728,
- 729, 732, 733, 904, 905, 908, 909, 920, 921, 924, 925, 968, 969,
- 972, 973, 984, 985, 988, 989, 650, 651, 654, 655, 666, 667, 670,
- 671, 714, 715, 718, 719, 730, 731, 734, 735, 906, 907, 910, 911,
- 922, 923, 926, 927, 970, 971, 974, 975, 986, 987, 990, 991, 672,
- 673, 676, 677, 688, 689, 692, 693, 736, 737, 740, 741, 752, 753,
- 756, 757, 928, 929, 932, 933, 944, 945, 948, 949, 992, 993, 996,
- 997, 1008, 1009, 1012, 1013, 674, 675, 678, 679, 690, 691, 694, 695,
- 738, 739, 742, 743, 754, 755, 758, 759, 930, 931, 934, 935, 946,
- 947, 950, 951, 994, 995, 998, 999, 1010, 1011, 1014, 1015, 680, 681,
- 684, 685, 696, 697, 700, 701, 744, 745, 748, 749, 760, 761, 764,
- 765, 936, 937, 940, 941, 952, 953, 956, 957, 1000, 1001, 1004, 1005,
- 1016, 1017, 1020, 1021, 682, 683, 686, 687, 698, 699, 702, 703, 746,
- 747, 750, 751, 762, 763, 766, 767, 938, 939, 942, 943, 954, 955,
- 958, 959, 1002, 1003, 1006, 1007, 1018, 1019, 1022, 1023,
-};
-#endif
-#endif // CONFIG_CB4X4 || CONFIG_EXT_PARTITION
-
-#if CONFIG_EXT_PARTITION
-/* clang-format off */
-static const uint16_t *const orders[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2
- orders_4x4, orders_4x4, orders_4x4,
-#endif
- // 4X4
- orders_4x4,
- // 4X8, 8X4, 8X8
- orders_4x8, orders_8x4, orders_8x8,
-#else // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 4X4
- orders_8x8,
- // 4X8, 8X4, 8X8
- orders_8x8, orders_8x8, orders_8x8,
-#endif
- // 8X16, 16X8, 16X16
- orders_8x16, orders_16x8, orders_16x16,
- // 16X32, 32X16, 32X32
- orders_16x32, orders_32x16, orders_32x32,
- // 32X64, 64X32, 64X64
- orders_32x64, orders_64x32, orders_64x64,
- // 64x128, 128x64, 128x128
- orders_64x128, orders_128x64, orders_128x128,
- // 4x16, 16x4, 8x32
- orders_4x16, orders_16x4, orders_8x32,
- // 32x8, 16x64, 64x16
- orders_32x8, orders_16x64, orders_64x16,
- // 32x128, 128x32
- orders_32x128, orders_128x32
-};
-/* clang-format on */
-#else
-/* clang-format off */
-static const uint16_t *const orders[BLOCK_SIZES_ALL] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2
- orders_8x8, orders_8x8, orders_8x8,
-#endif
- // 4X4
- orders_8x8,
- // 4X8, 8X4, 8X8
- orders_8x16, orders_16x8, orders_16x16,
-#else // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 4X4
- orders_16x16,
- // 4X8, 8X4, 8X8
- orders_16x16, orders_16x16, orders_16x16,
-#endif
- // 8X16, 16X8, 16X16
- orders_16x32, orders_32x16, orders_32x32,
- // 16X32, 32X16, 32X32
- orders_32x64, orders_64x32, orders_64x64,
- // 32X64, 64X32, 64X64
- orders_64x128, orders_128x64, orders_128x128,
- // 4x16, 16x4, 8x32
- orders_8x32, orders_32x8, orders_16x64,
- // 32x8, 16x64, 64x16
- orders_64x16, orders_32x128, orders_128x32
+static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
+static uint8_t has_tr_vert_64x64[1] = { 3 };
+
+// The _vert_* tables are like the ordinary tables above, but describe the
+// order we visit square blocks when doing a PARTITION_VERT_A or
+// PARTITION_VERT_B. This is the same order as normal except for on the last
+// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
+// as a pair of squares, which means that these tables work correctly for both
+// mixed vertical partition types.
+//
+// There are tables for each of the square sizes. Vertical rectangles (like
+// BLOCK_16X32) use their respective "non-vert" table
+static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
+ // 4X4
+ NULL,
+ // 4X8, 8X4, 8X8
+ has_tr_4x8, NULL, has_tr_vert_8x8,
+ // 8X16, 16X8, 16X16
+ has_tr_8x16, NULL, has_tr_vert_16x16,
+ // 16X32, 32X16, 32X32
+ has_tr_16x32, NULL, has_tr_vert_32x32,
+ // 32X64, 64X32, 64X64
+ has_tr_32x64, NULL, has_tr_vert_64x64,
+ // 64x128, 128x64, 128x128
+ has_tr_64x128, NULL, has_tr_128x128
};
-/* clang-format on */
-#endif // CONFIG_EXT_PARTITION
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
-static const uint16_t orders_verta_64x64[4] = {
- 0, 2, 1, 2,
-};
-static const uint16_t orders_verta_32x32[16] = {
- 0, 2, 4, 6, 1, 2, 5, 6, 8, 10, 12, 14, 9, 10, 13, 14,
-};
-static const uint16_t orders_verta_16x16[64] = {
- 0, 2, 4, 6, 16, 18, 20, 22, 1, 2, 5, 6, 17, 18, 21, 22,
- 8, 10, 12, 14, 24, 26, 28, 30, 9, 10, 13, 14, 25, 26, 29, 30,
- 32, 34, 36, 38, 48, 50, 52, 54, 33, 34, 37, 38, 49, 50, 53, 54,
- 40, 42, 44, 46, 56, 58, 60, 62, 41, 42, 45, 46, 57, 58, 61, 62,
-};
-#if CONFIG_EXT_PARTITION || CONFIG_CB4X4
-static const uint16_t orders_verta_8x8[256] = {
- 0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84,
- 86, 1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82,
- 85, 86, 8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88,
- 90, 92, 94, 9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78,
- 89, 90, 93, 94, 32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100,
- 102, 112, 114, 116, 118, 33, 34, 37, 38, 49, 50, 53, 54, 97, 98,
- 101, 102, 113, 114, 117, 118, 40, 42, 44, 46, 56, 58, 60, 62, 104,
- 106, 108, 110, 120, 122, 124, 126, 41, 42, 45, 46, 57, 58, 61, 62,
- 105, 106, 109, 110, 121, 122, 125, 126, 128, 130, 132, 134, 144, 146, 148,
- 150, 192, 194, 196, 198, 208, 210, 212, 214, 129, 130, 133, 134, 145, 146,
- 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, 136, 138, 140, 142, 152,
- 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, 137, 138, 141, 142,
- 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, 160, 162, 164,
- 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, 161, 162,
- 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, 168,
- 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254,
- 169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253,
- 254,
-};
-#endif // CONFIG_EXT_PARTITION || CONFIG_CB4X4
-
-#if CONFIG_EXT_PARTITION
-/* clang-format off */
-static const uint16_t *const orders_verta[BLOCK_SIZES] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2
- orders_4x4, orders_4x4, orders_4x4,
-#endif
- // 4X4
- orders_verta_8x8,
- // 4X8, 8X4, 8X8
- orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
- // 8X16, 16X8, 16X16
- orders_8x16, orders_16x8, orders_verta_16x16,
- // 16X32, 32X16, 32X32
- orders_16x32, orders_32x16, orders_verta_32x32,
- // 32X64, 64X32, 64X64
- orders_32x64, orders_64x32, orders_verta_64x64,
- // 64x128, 128x64, 128x128
- orders_64x128, orders_128x64, orders_128x128,
- // Note: We can't get 4:1 shaped blocks from a VERT_A type partition
-};
-/* clang-format on */
-#else
-/* clang-format off */
-static const uint16_t *const orders_verta[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 2X2, 2X4, 4X2
- orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
-#endif
- // 4X4
- orders_verta_8x8,
- // 4X8, 8X4, 8X8
- orders_verta_8x8, orders_verta_8x8, orders_verta_16x16,
-#else // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
- // 4X4
- orders_verta_16x16,
- // 4X8, 8X4, 8X8
- orders_verta_16x16, orders_verta_16x16, orders_verta_16x16,
-#endif
- // 8X16, 16X8, 16X16
- orders_16x32, orders_32x16, orders_verta_32x32,
- // 16X32, 32X16, 32X32
- orders_32x64, orders_64x32, orders_verta_64x64,
- // 32X64, 64X32, 64X64
- orders_64x128, orders_128x64, orders_128x128,
- // Note: We can't get 4:1 shaped blocks from a VERT_A type partition
-};
-/* clang-format on */
-#endif // CONFIG_EXT_PARTITION
-#endif // CONFIG_EXT_PARTITION_TYPES
+static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
+ BLOCK_SIZE bsize) {
+ const uint8_t *ret = NULL;
+ // If this is a mixed vertical partition, look up bsize in orders_vert.
+ if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
+ assert(bsize < BLOCK_SIZES);
+ ret = has_tr_vert_tables[bsize];
+ } else {
+ ret = has_tr_tables[bsize];
+ }
+ assert(ret);
+ return ret;
+}
static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
int mi_col, int top_available, int right_available,
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- PARTITION_TYPE partition,
-#endif // CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- TX_SIZE txsz, int row_off, int col_off, int ss_x) {
+ PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
+ int col_off, int ss_x, int ss_y) {
if (!top_available || !right_available) return 0;
-#if !CONFIG_CB4X4
- // TODO(bshacklett, huisu): Currently the RD loop traverses 4X8 blocks in
- // inverted N order while in the bitstream the subblocks are stored in Z
- // order. This discrepancy makes this function incorrect when considering 4X8
- // blocks in the RD loop, so we disable the extended right edge for these
- // blocks. The correct solution is to change the bitstream to store these
- // blocks in inverted N order, and then update this function appropriately.
- if (bsize == BLOCK_4X8 && row_off == 1) return 0;
-#endif
-
const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
const int top_right_count_unit = tx_size_wide_unit[txsz];
-#if !CONFIG_CB4X4
- // Special handling for block sizes 4x8 and 4x4.
- if (ss_x == 0 && bw_unit < 2 && col_off == 0) return 1;
-#endif
-
if (row_off > 0) { // Just need to check if enough pixels on the right.
-#if CONFIG_EXT_PARTITION
- if (col_off + top_right_count_unit >=
- (block_size_wide[BLOCK_64X64] >> (tx_size_wide_log2[0] + ss_x)))
- return 0;
-#endif
+ if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
+ // Special case: For 128x128 blocks, the transform unit whose
+ // top-right corner is at the center of the block does in fact have
+ // pixels available at its top-right corner.
+ if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
+ col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
+ return 1;
+ }
+ const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
+ const int col_off_64 = col_off % plane_bw_unit_64;
+ return col_off_64 + top_right_count_unit < plane_bw_unit_64;
+ }
return col_off + top_right_count_unit < plane_bw_unit;
} else {
// All top-right pixels are in the block above, which is already available.
if (col_off + top_right_count_unit < plane_bw_unit) return 1;
- const int bw_in_mi_log2 = mi_width_log2_lookup[bsize];
- const int bh_in_mi_log2 = mi_height_log2_lookup[bsize];
- const int sb_mi_size = mi_size_high[cm->sb_size];
+ const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
+ const int bh_in_mi_log2 = mi_size_high_log2[bsize];
+ const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
@@ -572,32 +232,175 @@ static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
// Rightmost column of superblock (and not the top row): so top-right pixels
// fall in the right superblock, which is not available yet.
- if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) return 0;
+ if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
+ return 0;
+ }
// General case (neither top row nor rightmost column): check if the
// top-right block is coded before the current block.
- const uint16_t *const order =
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- (partition == PARTITION_VERT_A) ? orders_verta[bsize] :
-#endif // CONFIG_EXT_PARTITION_TYPES
- orders[bsize];
const int this_blk_index =
((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
blk_col_in_sb + 0;
- const uint16_t this_blk_order = order[this_blk_index];
- const int tr_blk_index =
- ((blk_row_in_sb - 1) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
- blk_col_in_sb + 1;
- const uint16_t tr_blk_order = order[tr_blk_index];
- return tr_blk_order < this_blk_order;
+ const int idx1 = this_blk_index / 8;
+ const int idx2 = this_blk_index % 8;
+ const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
+ return (has_tr_table[idx1] >> idx2) & 1;
+ }
+}
+
+// Similar to the has_tr_* tables, but store if the bottom-left reference
+// pixels are available.
+static uint8_t has_bl_4x4[128] = {
+ 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
+ 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
+ 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
+ 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
+ 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
+ 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
+ 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
+};
+static uint8_t has_bl_4x8[64] = {
+ 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
+ 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
+ 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
+ 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
+};
+static uint8_t has_bl_8x4[64] = {
+ 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
+ 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
+ 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
+ 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
+};
+static uint8_t has_bl_8x8[32] = {
+ 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
+ 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
+};
+static uint8_t has_bl_8x16[16] = {
+ 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
+};
+static uint8_t has_bl_16x8[16] = {
+ 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
+};
+static uint8_t has_bl_16x16[8] = {
+ 84, 16, 84, 0, 84, 16, 84, 0,
+};
+static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
+static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
+static uint8_t has_bl_32x32[2] = { 4, 4 };
+static uint8_t has_bl_32x64[1] = { 0 };
+static uint8_t has_bl_64x32[1] = { 34 };
+static uint8_t has_bl_64x64[1] = { 0 };
+static uint8_t has_bl_64x128[1] = { 0 };
+static uint8_t has_bl_128x64[1] = { 0 };
+static uint8_t has_bl_128x128[1] = { 0 };
+static uint8_t has_bl_4x16[32] = {
+ 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
+ 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
+};
+static uint8_t has_bl_16x4[32] = {
+ 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
+ 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
+};
+static uint8_t has_bl_8x32[8] = {
+ 0, 1, 0, 0, 0, 1, 0, 0,
+};
+static uint8_t has_bl_32x8[8] = {
+ 238, 78, 238, 14, 238, 78, 238, 14,
+};
+static uint8_t has_bl_16x64[2] = { 0, 0 };
+static uint8_t has_bl_64x16[2] = { 42, 42 };
+
+static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
+ // 4X4
+ has_bl_4x4,
+ // 4X8, 8X4, 8X8
+ has_bl_4x8, has_bl_8x4, has_bl_8x8,
+ // 8X16, 16X8, 16X16
+ has_bl_8x16, has_bl_16x8, has_bl_16x16,
+ // 16X32, 32X16, 32X32
+ has_bl_16x32, has_bl_32x16, has_bl_32x32,
+ // 32X64, 64X32, 64X64
+ has_bl_32x64, has_bl_64x32, has_bl_64x64,
+ // 64x128, 128x64, 128x128
+ has_bl_64x128, has_bl_128x64, has_bl_128x128,
+ // 4x16, 16x4, 8x32
+ has_bl_4x16, has_bl_16x4, has_bl_8x32,
+ // 32x8, 16x64, 64x16
+ has_bl_32x8, has_bl_16x64, has_bl_64x16
+};
+
+static uint8_t has_bl_vert_8x8[32] = {
+ 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
+ 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
+};
+static uint8_t has_bl_vert_16x16[8] = {
+ 254, 16, 254, 0, 254, 16, 254, 0,
+};
+static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
+static uint8_t has_bl_vert_64x64[1] = { 2 };
+
+// The _vert_* tables are like the ordinary tables above, but describe the
+// order we visit square blocks when doing a PARTITION_VERT_A or
+// PARTITION_VERT_B. This is the same order as normal except for on the last
+// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
+// as a pair of squares, which means that these tables work correctly for both
+// mixed vertical partition types.
+//
+// There are tables for each of the square sizes. Vertical rectangles (like
+// BLOCK_16X32) use their respective "non-vert" table
+static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
+ // 4X4
+ NULL,
+ // 4X8, 8X4, 8X8
+ has_bl_4x8, NULL, has_bl_vert_8x8,
+ // 8X16, 16X8, 16X16
+ has_bl_8x16, NULL, has_bl_vert_16x16,
+ // 16X32, 32X16, 32X32
+ has_bl_16x32, NULL, has_bl_vert_32x32,
+ // 32X64, 64X32, 64X64
+ has_bl_32x64, NULL, has_bl_vert_64x64,
+ // 64x128, 128x64, 128x128
+ has_bl_64x128, NULL, has_bl_128x128
+};
+
+static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
+ BLOCK_SIZE bsize) {
+ const uint8_t *ret = NULL;
+ // If this is a mixed vertical partition, look up bsize in orders_vert.
+ if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
+ assert(bsize < BLOCK_SIZES);
+ ret = has_bl_vert_tables[bsize];
+ } else {
+ ret = has_bl_tables[bsize];
}
+ assert(ret);
+ return ret;
}
static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
int mi_col, int bottom_available, int left_available,
- TX_SIZE txsz, int row_off, int col_off, int ss_y) {
+ PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
+ int col_off, int ss_x, int ss_y) {
if (!bottom_available || !left_available) return 0;
+ // Special case for 128x* blocks, when col_off is half the block width.
+ // This is needed because 128x* superblocks are divided into 64x* blocks in
+ // raster order
+ if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
+ const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
+ const int col_off_64 = col_off % plane_bw_unit_64;
+ if (col_off_64 == 0) {
+ // We are at the left edge of top-right or bottom-right 64x* block.
+ const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
+ const int row_off_64 = row_off % plane_bh_unit_64;
+ const int plane_bh_unit =
+ AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
+ // Check if all bottom-left pixels are in the left 64x* block (which is
+ // already coded).
+ return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
+ }
+ }
+
if (col_off > 0) {
// Bottom-left pixels are in the bottom-left block, which is not available.
return 0;
@@ -606,17 +409,12 @@ static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
const int bottom_left_count_unit = tx_size_high_unit[txsz];
-#if !CONFIG_CB4X4
- // Special handling for block sizes 8x4 and 4x4.
- if (ss_y == 0 && bh_unit < 2 && row_off == 0) return 1;
-#endif
-
// All bottom-left pixels are in the left block, which is already available.
if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
- const int bw_in_mi_log2 = mi_width_log2_lookup[bsize];
- const int bh_in_mi_log2 = mi_height_log2_lookup[bsize];
- const int sb_mi_size = mi_size_high[cm->sb_size];
+ const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
+ const int bh_in_mi_log2 = mi_size_high_log2[bsize];
+ const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
@@ -629,8 +427,7 @@ static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
tx_size_wide_log2[0]) >>
ss_y;
const int row_off_in_sb = blk_start_row_off + row_off;
- const int sb_height_unit =
- sb_mi_size << (MI_SIZE_LOG2 - tx_size_wide_log2[0]) >> ss_y;
+ const int sb_height_unit = sb_mi_size >> ss_y;
return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
}
@@ -640,16 +437,13 @@ static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
// General case (neither leftmost column nor bottom row): check if the
// bottom-left block is coded before the current block.
- const uint16_t *const order = orders[bsize];
const int this_blk_index =
((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
blk_col_in_sb + 0;
- const uint16_t this_blk_order = order[this_blk_index];
- const int bl_blk_index =
- ((blk_row_in_sb + 1) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
- blk_col_in_sb - 1;
- const uint16_t bl_blk_order = order[bl_blk_index];
- return bl_blk_order < this_blk_order;
+ const int idx1 = this_blk_index / 8;
+ const int idx2 = this_blk_index % 8;
+ const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
+ return (has_bl_table[idx1] >> idx2) & 1;
}
}
@@ -659,20 +453,15 @@ typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
-#if CONFIG_HIGHBITDEPTH
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd);
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
-#endif // CONFIG_HIGHBITDEPTH
-static void av1_init_intra_predictors_internal(void) {
-#if CONFIG_EXT_INTRA
+static void init_intra_predictors_internal(void) {
assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_TX64X64
#define INIT_RECTANGULAR(p, type) \
p[TX_4X8] = aom_##type##_predictor_4x8; \
p[TX_8X4] = aom_##type##_predictor_8x4; \
@@ -681,132 +470,53 @@ static void av1_init_intra_predictors_internal(void) {
p[TX_16X32] = aom_##type##_predictor_16x32; \
p[TX_32X16] = aom_##type##_predictor_32x16; \
p[TX_32X64] = aom_##type##_predictor_32x64; \
- p[TX_64X32] = aom_##type##_predictor_64x32;
-#else
-#define INIT_RECTANGULAR(p, type) \
- p[TX_4X8] = aom_##type##_predictor_4x8; \
- p[TX_8X4] = aom_##type##_predictor_8x4; \
- p[TX_8X16] = aom_##type##_predictor_8x16; \
- p[TX_16X8] = aom_##type##_predictor_16x8; \
- p[TX_16X32] = aom_##type##_predictor_16x32; \
- p[TX_32X16] = aom_##type##_predictor_32x16;
-#endif // CONFIG_TX64X64
+ p[TX_64X32] = aom_##type##_predictor_64x32; \
+ p[TX_4X16] = aom_##type##_predictor_4x16; \
+ p[TX_16X4] = aom_##type##_predictor_16x4; \
+ p[TX_8X32] = aom_##type##_predictor_8x32; \
+ p[TX_32X8] = aom_##type##_predictor_32x8; \
+ p[TX_16X64] = aom_##type##_predictor_16x64; \
+ p[TX_64X16] = aom_##type##_predictor_64x16;
-#if CONFIG_TX64X64
#define INIT_NO_4X4(p, type) \
p[TX_8X8] = aom_##type##_predictor_8x8; \
p[TX_16X16] = aom_##type##_predictor_16x16; \
p[TX_32X32] = aom_##type##_predictor_32x32; \
p[TX_64X64] = aom_##type##_predictor_64x64; \
INIT_RECTANGULAR(p, type)
-#else
-#define INIT_NO_4X4(p, type) \
- p[TX_8X8] = aom_##type##_predictor_8x8; \
- p[TX_16X16] = aom_##type##_predictor_16x16; \
- p[TX_32X32] = aom_##type##_predictor_32x32; \
- INIT_RECTANGULAR(p, type)
-#endif // CONFIG_TX64X64
-#if CONFIG_CHROMA_2X2
#define INIT_ALL_SIZES(p, type) \
- p[TX_2X2] = aom_##type##_predictor_2x2; \
p[TX_4X4] = aom_##type##_predictor_4x4; \
INIT_NO_4X4(p, type)
-#else
-#define INIT_ALL_SIZES(p, type) \
- p[TX_4X4] = aom_##type##_predictor_4x4; \
- INIT_NO_4X4(p, type)
-#endif
INIT_ALL_SIZES(pred[V_PRED], v);
INIT_ALL_SIZES(pred[H_PRED], h);
- INIT_ALL_SIZES(pred[D207_PRED], d207e);
- INIT_ALL_SIZES(pred[D45_PRED], d45e);
- INIT_ALL_SIZES(pred[D63_PRED], d63e);
- INIT_ALL_SIZES(pred[D117_PRED], d117);
- INIT_ALL_SIZES(pred[D135_PRED], d135);
- INIT_ALL_SIZES(pred[D153_PRED], d153);
-
- INIT_ALL_SIZES(pred[TM_PRED], paeth);
+ INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
-#if CONFIG_SMOOTH_HV
INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
-#endif // CONFIG_SMOOTH_HV
-
INIT_ALL_SIZES(dc_pred[0][0], dc_128);
INIT_ALL_SIZES(dc_pred[0][1], dc_top);
INIT_ALL_SIZES(dc_pred[1][0], dc_left);
INIT_ALL_SIZES(dc_pred[1][1], dc);
-#if CONFIG_HIGHBITDEPTH
INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
- INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
- INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
- INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63e);
- INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
- INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
- INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
-
- INIT_ALL_SIZES(pred_high[TM_PRED], highbd_paeth);
+ INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
-#if CONFIG_SMOOTH_HV
INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
-#endif // CONFIG_SMOOTH_HV
-
INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
-#endif // CONFIG_HIGHBITDEPTH
-
#undef intra_pred_allsizes
}
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-static int intra_subpel_interp(int base, int shift, const uint8_t *ref,
- int ref_start_idx, int ref_end_idx,
- INTRA_FILTER filter_type) {
- int val, k, idx, filter_idx = 0;
- const int16_t *filter = NULL;
-
- if (filter_type == INTRA_FILTER_LINEAR) {
- val = ref[base] * (256 - shift) + ref[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
- } else {
- filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
- filter = av1_intra_filter_kernels[filter_type][filter_idx];
-
- if (filter_idx < (1 << SUBPEL_BITS)) {
- val = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k) {
- idx = base + 1 - (SUBPEL_TAPS / 2) + k;
- idx = AOMMAX(AOMMIN(idx, ref_end_idx), ref_start_idx);
- val += ref[idx] * filter[k];
- }
- val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
- } else {
- val = ref[base + 1];
- }
- }
-
- return val;
-}
-#endif // CONFIG_INTRA_INTERP
-
// Directional prediction, zone 1: 0 < angle < 90
-static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy) {
+void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+ const uint8_t *above, const uint8_t *left,
+ int upsample_above, int dx, int dy) {
int r, c, x, base, shift, val;
(void)left;
@@ -814,16 +524,13 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
assert(dy == 1);
assert(dx > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int max_base_x = ((bw + bh) - 1) << upsample_above;
- const int frac_bits = 8 - upsample_above;
+ const int frac_bits = 6 - upsample_above;
const int base_inc = 1 << upsample_above;
x = dx;
for (r = 0; r < bh; ++r, dst += stride, x += dx) {
base = x >> frac_bits;
- shift = (x << upsample_above) & 0xFF;
+ shift = ((x << upsample_above) & 0x3F) >> 1;
if (base >= max_base_x) {
for (int i = r; i < bh; ++i) {
@@ -835,14 +542,8 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
for (c = 0; c < bw; ++c, base += base_inc) {
if (base < max_base_x) {
-#if CONFIG_INTRA_INTERP
- val = intra_subpel_interp(base, shift, above, 0, bw + bh - 1,
- filter_type);
-#else // CONFIG_INTRA_INTERP
- val = above[base] * (256 - shift) + above[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
- dst[c] = clip_pixel(val);
+ val = above[base] * (32 - shift) + above[base + 1] * shift;
+ dst[c] = ROUND_POWER_OF_TWO(val, 5);
} else {
dst[c] = above[max_base_x];
}
@@ -851,68 +552,44 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
}
// Directional prediction, zone 2: 90 < angle < 180
-static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above, int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy) {
+void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+ const uint8_t *above, const uint8_t *left,
+ int upsample_above, int upsample_left, int dx,
+ int dy) {
int r, c, x, y, shift1, shift2, val, base1, base2;
assert(dx > 0);
assert(dy > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = 0;
- const int upsample_left = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int min_base_x = -(1 << upsample_above);
- const int frac_bits_x = 8 - upsample_above;
- const int frac_bits_y = 8 - upsample_left;
+ const int frac_bits_x = 6 - upsample_above;
+ const int frac_bits_y = 6 - upsample_left;
const int base_inc_x = 1 << upsample_above;
x = -dx;
for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
base1 = x >> frac_bits_x;
- y = (r << 8) - dy;
+ y = (r << 6) - dy;
for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
if (base1 >= min_base_x) {
- shift1 = (x * (1 << upsample_above)) & 0xFF;
-#if CONFIG_INTRA_INTERP
- val =
- intra_subpel_interp(base1, shift1, above, -1, bw - 1, filter_type);
-#else
- val = above[base1] * (256 - shift1) + above[base1 + 1] * shift1;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
+ shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1;
+ val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1;
+ val = ROUND_POWER_OF_TWO(val, 5);
} else {
base2 = y >> frac_bits_y;
assert(base2 >= -(1 << upsample_left));
- shift2 = (y * (1 << upsample_left)) & 0xFF;
-#if CONFIG_INTRA_INTERP
- val = intra_subpel_interp(base2, shift2, left, -1, bh - 1, filter_type);
-#else
- val = left[base2] * (256 - shift2) + left[base2 + 1] * shift2;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
+ shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1;
+ val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2;
+ val = ROUND_POWER_OF_TWO(val, 5);
}
- dst[c] = clip_pixel(val);
+ dst[c] = val;
}
}
}
// Directional prediction, zone 3: 180 < angle < 270
-static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy) {
+void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
+ const uint8_t *above, const uint8_t *left,
+ int upsample_left, int dx, int dy) {
int r, c, y, base, shift, val;
(void)above;
@@ -921,27 +598,18 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
assert(dx == 1);
assert(dy > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_left = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int max_base_y = (bw + bh - 1) << upsample_left;
- const int frac_bits = 8 - upsample_left;
+ const int frac_bits = 6 - upsample_left;
const int base_inc = 1 << upsample_left;
y = dy;
for (c = 0; c < bw; ++c, y += dy) {
base = y >> frac_bits;
- shift = (y << upsample_left) & 0xFF;
+ shift = ((y << upsample_left) & 0x3F) >> 1;
for (r = 0; r < bh; ++r, base += base_inc) {
if (base < max_base_y) {
-#if CONFIG_INTRA_INTERP
- val =
- intra_subpel_interp(base, shift, left, 0, bw + bh - 1, filter_type);
-#else // CONFIG_INTRA_INTERP
- val = left[base] * (256 - shift) + left[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
- dst[r * stride + c] = clip_pixel(val);
+ val = left[base] * (32 - shift) + left[base + 1] * shift;
+ dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
} else {
for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
break;
@@ -950,78 +618,24 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
}
}
-// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
-// If angle > 0 && angle < 90, dx = -((int)(256 / t));
-// If angle > 90 && angle < 180, dx = (int)(256 / t);
-// If angle > 180 && angle < 270, dx = 1;
-static INLINE int get_dx(int angle) {
- if (angle > 0 && angle < 90) {
- return dr_intra_derivative[angle];
- } else if (angle > 90 && angle < 180) {
- return dr_intra_derivative[180 - angle];
- } else {
- // In this case, we are not really going to use dx. We may return any value.
- return 1;
- }
-}
-
-// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X.
-// If angle > 0 && angle < 90, dy = 1;
-// If angle > 90 && angle < 180, dy = (int)(256 * t);
-// If angle > 180 && angle < 270, dy = -((int)(256 * t));
-static INLINE int get_dy(int angle) {
- if (angle > 90 && angle < 180) {
- return dr_intra_derivative[angle - 90];
- } else if (angle > 180 && angle < 270) {
- return dr_intra_derivative[270 - angle];
- } else {
- // In this case, we are not really going to use dy. We may return any value.
- return 1;
- }
-}
-
static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
const uint8_t *above, const uint8_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above, int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int angle) {
- const int dx = get_dx(angle);
- const int dy = get_dy(angle);
+ int upsample_above, int upsample_left, int angle) {
+ const int dx = av1_get_dx(angle);
+ const int dy = av1_get_dy(angle);
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
assert(angle > 0 && angle < 270);
if (angle > 0 && angle < 90) {
- dr_prediction_z1(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy);
+ av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
+ dy);
} else if (angle > 90 && angle < 180) {
- dr_prediction_z2(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above, upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy);
+ av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
+ upsample_left, dx, dy);
} else if (angle > 180 && angle < 270) {
- dr_prediction_z3(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy);
+ av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
+ dy);
} else if (angle == 90) {
pred[V_PRED][tx_size](dst, stride, above, left);
} else if (angle == 180) {
@@ -1029,66 +643,26 @@ static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
}
}
-#if CONFIG_HIGHBITDEPTH
-#if CONFIG_INTRA_INTERP
-static int highbd_intra_subpel_interp(int base, int shift, const uint16_t *ref,
- int ref_start_idx, int ref_end_idx,
- INTRA_FILTER filter_type) {
- int val, k, idx, filter_idx = 0;
- const int16_t *filter = NULL;
-
- if (filter_type == INTRA_FILTER_LINEAR) {
- val = ref[base] * (256 - shift) + ref[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
- } else {
- filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
- filter = av1_intra_filter_kernels[filter_type][filter_idx];
-
- if (filter_idx < (1 << SUBPEL_BITS)) {
- val = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k) {
- idx = base + 1 - (SUBPEL_TAPS / 2) + k;
- idx = AOMMAX(AOMMIN(idx, ref_end_idx), ref_start_idx);
- val += ref[idx] * filter[k];
- }
- val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
- } else {
- val = ref[base + 1];
- }
- }
-
- return val;
-}
-#endif // CONFIG_INTRA_INTERP
-
// Directional prediction, zone 1: 0 < angle < 90
-static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy, int bd) {
+void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
+ int bh, const uint16_t *above,
+ const uint16_t *left, int upsample_above,
+ int dx, int dy, int bd) {
int r, c, x, base, shift, val;
(void)left;
(void)dy;
+ (void)bd;
assert(dy == 1);
assert(dx > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int max_base_x = ((bw + bh) - 1) << upsample_above;
- const int frac_bits = 8 - upsample_above;
+ const int frac_bits = 6 - upsample_above;
const int base_inc = 1 << upsample_above;
x = dx;
for (r = 0; r < bh; ++r, dst += stride, x += dx) {
base = x >> frac_bits;
- shift = (x << upsample_above) & 0xFF;
+ shift = ((x << upsample_above) & 0x3F) >> 1;
if (base >= max_base_x) {
for (int i = r; i < bh; ++i) {
@@ -1100,14 +674,8 @@ static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bw,
for (c = 0; c < bw; ++c, base += base_inc) {
if (base < max_base_x) {
-#if CONFIG_INTRA_INTERP
- val = highbd_intra_subpel_interp(base, shift, above, 0, bw + bh - 1,
- filter_type);
-#else
- val = above[base] * (256 - shift) + above[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
- dst[c] = clip_pixel_highbd(val, bd);
+ val = above[base] * (32 - shift) + above[base + 1] * shift;
+ dst[c] = ROUND_POWER_OF_TWO(val, 5);
} else {
dst[c] = above[max_base_x];
}
@@ -1116,100 +684,67 @@ static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bw,
}
// Directional prediction, zone 2: 90 < angle < 180
-static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above, int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy, int bd) {
+void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
+ int bh, const uint16_t *above,
+ const uint16_t *left, int upsample_above,
+ int upsample_left, int dx, int dy, int bd) {
int r, c, x, y, shift, val, base;
+ (void)bd;
assert(dx > 0);
assert(dy > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = 0;
- const int upsample_left = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int min_base_x = -(1 << upsample_above);
- const int frac_bits_x = 8 - upsample_above;
- const int frac_bits_y = 8 - upsample_left;
+ const int frac_bits_x = 6 - upsample_above;
+ const int frac_bits_y = 6 - upsample_left;
for (r = 0; r < bh; ++r) {
for (c = 0; c < bw; ++c) {
y = r + 1;
- x = (c << 8) - y * dx;
+ x = (c << 6) - y * dx;
base = x >> frac_bits_x;
if (base >= min_base_x) {
- shift = (x * (1 << upsample_above)) & 0xFF;
-#if CONFIG_INTRA_INTERP
- val = highbd_intra_subpel_interp(base, shift, above, -1, bw - 1,
- filter_type);
-#else
- val = above[base] * (256 - shift) + above[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
+ shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
+ val = above[base] * (32 - shift) + above[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 5);
} else {
x = c + 1;
- y = (r << 8) - x * dy;
+ y = (r << 6) - x * dy;
base = y >> frac_bits_y;
- shift = (y * (1 << upsample_left)) & 0xFF;
-#if CONFIG_INTRA_INTERP
- val = highbd_intra_subpel_interp(base, shift, left, -1, bh - 1,
- filter_type);
-#else
- val = left[base] * (256 - shift) + left[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
+ shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
+ val = left[base] * (32 - shift) + left[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 5);
}
- dst[c] = clip_pixel_highbd(val, bd);
+ dst[c] = val;
}
dst += stride;
}
}
// Directional prediction, zone 3: 180 < angle < 270
-static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter_type,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int dx, int dy, int bd) {
+void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
+ int bh, const uint16_t *above,
+ const uint16_t *left, int upsample_left,
+ int dx, int dy, int bd) {
int r, c, y, base, shift, val;
(void)above;
(void)dx;
+ (void)bd;
assert(dx == 1);
assert(dy > 0);
-#if !CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_left = 0;
-#endif // !CONFIG_INTRA_EDGE_UPSAMPLE
const int max_base_y = (bw + bh - 1) << upsample_left;
- const int frac_bits = 8 - upsample_left;
+ const int frac_bits = 6 - upsample_left;
const int base_inc = 1 << upsample_left;
y = dy;
for (c = 0; c < bw; ++c, y += dy) {
base = y >> frac_bits;
- shift = (y << upsample_left) & 0xFF;
+ shift = ((y << upsample_left) & 0x3F) >> 1;
for (r = 0; r < bh; ++r, base += base_inc) {
if (base < max_base_y) {
-#if CONFIG_INTRA_INTERP
- val = highbd_intra_subpel_interp(base, shift, left, 0, bw + bh - 1,
- filter_type);
-#else
- val = left[base] * (256 - shift) + left[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 8);
-#endif // CONFIG_INTRA_INTERP
- dst[r * stride + c] = clip_pixel_highbd(val, bd);
+ val = left[base] * (32 - shift) + left[base + 1] * shift;
+ dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
} else {
for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
break;
@@ -1220,1002 +755,253 @@ static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bw,
static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left,
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- int upsample_above, int upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- int angle, int bd) {
- const int dx = get_dx(angle);
- const int dy = get_dy(angle);
+ const uint16_t *left, int upsample_above,
+ int upsample_left, int angle, int bd) {
+ const int dx = av1_get_dx(angle);
+ const int dy = av1_get_dy(angle);
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
assert(angle > 0 && angle < 270);
if (angle > 0 && angle < 90) {
- highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy, bd);
+ av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
+ upsample_above, dx, dy, bd);
} else if (angle > 90 && angle < 180) {
- highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above, upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy, bd);
+ av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
+ upsample_above, upsample_left, dx, dy, bd);
} else if (angle > 180 && angle < 270) {
- highbd_dr_prediction_z3(dst, stride, bw, bh, above, left,
-#if CONFIG_INTRA_INTERP
- filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- dx, dy, bd);
+ av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
+ dx, dy, bd);
} else if (angle == 90) {
pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
} else if (angle == 180) {
pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
}
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
-#if USE_3TAP_INTRA_FILTER
-static int filter_intra_taps_3[TX_SIZES_ALL][FILTER_INTRA_MODES][3] = {
-#if CONFIG_CHROMA_2X2
- {
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
- },
-#endif
+DECLARE_ALIGNED(16, const int8_t,
+ av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
{
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
+ { -6, 10, 0, 0, 0, 12, 0, 0 },
+ { -5, 2, 10, 0, 0, 9, 0, 0 },
+ { -3, 1, 1, 10, 0, 7, 0, 0 },
+ { -3, 1, 1, 2, 10, 5, 0, 0 },
+ { -4, 6, 0, 0, 0, 2, 12, 0 },
+ { -3, 2, 6, 0, 0, 2, 9, 0 },
+ { -3, 2, 2, 6, 0, 2, 7, 0 },
+ { -3, 1, 2, 2, 6, 3, 5, 0 },
},
{
- { 659, 816, -451 },
- { 980, 625, -581 },
- { 558, 962, -496 },
- { 681, 888, -545 },
- { 591, 613, 180 },
- { 778, 399, -153 },
- { 495, 641, -112 },
- { 671, 937, -584 },
- { 745, 940, -661 },
- { 839, 911, -726 },
+ { -10, 16, 0, 0, 0, 10, 0, 0 },
+ { -6, 0, 16, 0, 0, 6, 0, 0 },
+ { -4, 0, 0, 16, 0, 4, 0, 0 },
+ { -2, 0, 0, 0, 16, 2, 0, 0 },
+ { -10, 16, 0, 0, 0, 0, 10, 0 },
+ { -6, 0, 16, 0, 0, 0, 6, 0 },
+ { -4, 0, 0, 16, 0, 0, 4, 0 },
+ { -2, 0, 0, 0, 16, 0, 2, 0 },
},
{
- { 539, 927, -442 },
- { 1003, 714, -693 },
- { 349, 1271, -596 },
- { 820, 764, -560 },
- { 524, 816, -316 },
- { 780, 681, -437 },
- { 586, 795, -357 },
- { 551, 1135, -663 },
- { 593, 1061, -630 },
- { 974, 970, -920 },
+ { -8, 8, 0, 0, 0, 16, 0, 0 },
+ { -8, 0, 8, 0, 0, 16, 0, 0 },
+ { -8, 0, 0, 8, 0, 16, 0, 0 },
+ { -8, 0, 0, 0, 8, 16, 0, 0 },
+ { -4, 4, 0, 0, 0, 0, 16, 0 },
+ { -4, 0, 4, 0, 0, 0, 16, 0 },
+ { -4, 0, 0, 4, 0, 0, 16, 0 },
+ { -4, 0, 0, 0, 4, 0, 16, 0 },
},
{
- { 595, 919, -490 },
- { 945, 668, -579 },
- { 495, 962, -433 },
- { 385, 1551, -912 },
- { 455, 554, 15 },
- { 852, 478, -306 },
- { 177, 760, -87 },
- { -65, 1611, -522 },
- { 815, 894, -685 },
- { 846, 1010, -832 },
+ { -2, 8, 0, 0, 0, 10, 0, 0 },
+ { -1, 3, 8, 0, 0, 6, 0, 0 },
+ { -1, 2, 3, 8, 0, 4, 0, 0 },
+ { 0, 1, 2, 3, 8, 2, 0, 0 },
+ { -1, 4, 0, 0, 0, 3, 10, 0 },
+ { -1, 3, 4, 0, 0, 4, 6, 0 },
+ { -1, 2, 3, 4, 0, 4, 4, 0 },
+ { -1, 2, 2, 3, 4, 3, 3, 0 },
},
-#if CONFIG_TX64X64
{
- { 595, 919, -490 },
- { 945, 668, -579 },
- { 495, 962, -433 },
- { 385, 1551, -912 },
- { 455, 554, 15 },
- { 852, 478, -306 },
- { 177, 760, -87 },
- { -65, 1611, -522 },
- { 815, 894, -685 },
- { 846, 1010, -832 },
+ { -12, 14, 0, 0, 0, 14, 0, 0 },
+ { -10, 0, 14, 0, 0, 12, 0, 0 },
+ { -9, 0, 0, 14, 0, 11, 0, 0 },
+ { -8, 0, 0, 0, 14, 10, 0, 0 },
+ { -10, 12, 0, 0, 0, 0, 14, 0 },
+ { -9, 1, 12, 0, 0, 0, 12, 0 },
+ { -8, 0, 0, 12, 0, 1, 11, 0 },
+ { -7, 0, 0, 1, 12, 1, 9, 0 },
},
-#endif // CONFIG_TX64X64
- {
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
- },
- {
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
- },
- {
- { 659, 816, -451 },
- { 980, 625, -581 },
- { 558, 962, -496 },
- { 681, 888, -545 },
- { 591, 613, 180 },
- { 778, 399, -153 },
- { 495, 641, -112 },
- { 671, 937, -584 },
- { 745, 940, -661 },
- { 839, 911, -726 },
- },
- {
- { 659, 816, -451 },
- { 980, 625, -581 },
- { 558, 962, -496 },
- { 681, 888, -545 },
- { 591, 613, 180 },
- { 778, 399, -153 },
- { 495, 641, -112 },
- { 671, 937, -584 },
- { 745, 940, -661 },
- { 839, 911, -726 },
- },
- {
- { 539, 927, -442 },
- { 1003, 714, -693 },
- { 349, 1271, -596 },
- { 820, 764, -560 },
- { 524, 816, -316 },
- { 780, 681, -437 },
- { 586, 795, -357 },
- { 551, 1135, -663 },
- { 593, 1061, -630 },
- { 974, 970, -920 },
- },
- {
- { 539, 927, -442 },
- { 1003, 714, -693 },
- { 349, 1271, -596 },
- { 820, 764, -560 },
- { 524, 816, -316 },
- { 780, 681, -437 },
- { 586, 795, -357 },
- { 551, 1135, -663 },
- { 593, 1061, -630 },
- { 974, 970, -920 },
- },
- {
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
- },
- {
- { 697, 836, -509 },
- { 993, 513, -482 },
- { 381, 984, -341 },
- { 642, 1169, -787 },
- { 590, 553, -119 },
- { 762, 385, -123 },
- { 358, 687, -21 },
- { 411, 1083, -470 },
- { 912, 814, -702 },
- { 883, 902, -761 },
- },
- {
- { 659, 816, -451 },
- { 980, 625, -581 },
- { 558, 962, -496 },
- { 681, 888, -545 },
- { 591, 613, 180 },
- { 778, 399, -153 },
- { 495, 641, -112 },
- { 671, 937, -584 },
- { 745, 940, -661 },
- { 839, 911, -726 },
- },
- {
- { 659, 816, -451 },
- { 980, 625, -581 },
- { 558, 962, -496 },
- { 681, 888, -545 },
- { 591, 613, 180 },
- { 778, 399, -153 },
- { 495, 641, -112 },
- { 671, 937, -584 },
- { 745, 940, -661 },
- { 839, 911, -726 },
- }
};
-#else
-static int filter_intra_taps_4[TX_SIZES_ALL][FILTER_INTRA_MODES][4] = {
-#if CONFIG_CHROMA_2X2
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
-#endif
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
- {
- { 648, 803, -444, 16 },
- { 972, 620, -576, 7 },
- { 561, 967, -499, -5 },
- { 585, 762, -468, 144 },
- { 596, 619, -182, -9 },
- { 895, 459, -176, -153 },
- { 557, 722, -126, -129 },
- { 601, 839, -523, 105 },
- { 562, 709, -499, 251 },
- { 803, 872, -695, 43 },
- },
- {
- { 423, 728, -347, 111 },
- { 963, 685, -665, 23 },
- { 281, 1024, -480, 216 },
- { 640, 596, -437, 78 },
- { 429, 669, -259, 99 },
- { 740, 646, -415, 23 },
- { 568, 771, -346, 40 },
- { 404, 833, -486, 209 },
- { 398, 712, -423, 307 },
- { 939, 935, -887, 17 },
- },
- {
- { 477, 737, -393, 150 },
- { 881, 630, -546, 67 },
- { 506, 984, -443, -20 },
- { 114, 459, -270, 528 },
- { 433, 528, 14, 3 },
- { 837, 470, -301, -30 },
- { 181, 777, 89, -107 },
- { -29, 716, -232, 259 },
- { 589, 646, -495, 255 },
- { 740, 884, -728, 77 },
- },
-#if CONFIG_TX64X64
- {
- { 477, 737, -393, 150 },
- { 881, 630, -546, 67 },
- { 506, 984, -443, -20 },
- { 114, 459, -270, 528 },
- { 433, 528, 14, 3 },
- { 837, 470, -301, -30 },
- { 181, 777, 89, -107 },
- { -29, 716, -232, 259 },
- { 589, 646, -495, 255 },
- { 740, 884, -728, 77 },
- },
-#endif // CONFIG_TX64X64
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
- {
- { 648, 803, -444, 16 },
- { 972, 620, -576, 7 },
- { 561, 967, -499, -5 },
- { 585, 762, -468, 144 },
- { 596, 619, -182, -9 },
- { 895, 459, -176, -153 },
- { 557, 722, -126, -129 },
- { 601, 839, -523, 105 },
- { 562, 709, -499, 251 },
- { 803, 872, -695, 43 },
- },
- {
- { 648, 803, -444, 16 },
- { 972, 620, -576, 7 },
- { 561, 967, -499, -5 },
- { 585, 762, -468, 144 },
- { 596, 619, -182, -9 },
- { 895, 459, -176, -153 },
- { 557, 722, -126, -129 },
- { 601, 839, -523, 105 },
- { 562, 709, -499, 251 },
- { 803, 872, -695, 43 },
- },
- {
- { 423, 728, -347, 111 },
- { 963, 685, -665, 23 },
- { 281, 1024, -480, 216 },
- { 640, 596, -437, 78 },
- { 429, 669, -259, 99 },
- { 740, 646, -415, 23 },
- { 568, 771, -346, 40 },
- { 404, 833, -486, 209 },
- { 398, 712, -423, 307 },
- { 939, 935, -887, 17 },
- },
- {
- { 423, 728, -347, 111 },
- { 963, 685, -665, 23 },
- { 281, 1024, -480, 216 },
- { 640, 596, -437, 78 },
- { 429, 669, -259, 99 },
- { 740, 646, -415, 23 },
- { 568, 771, -346, 40 },
- { 404, 833, -486, 209 },
- { 398, 712, -423, 307 },
- { 939, 935, -887, 17 },
- },
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
- {
- { 735, 881, -537, -54 },
- { 1005, 519, -488, -11 },
- { 383, 990, -343, -6 },
- { 442, 805, -542, 319 },
- { 658, 616, -133, -116 },
- { 875, 442, -141, -151 },
- { 386, 741, -23, -80 },
- { 390, 1027, -446, 51 },
- { 679, 606, -523, 262 },
- { 903, 922, -778, -23 },
- },
- {
- { 648, 803, -444, 16 },
- { 972, 620, -576, 7 },
- { 561, 967, -499, -5 },
- { 585, 762, -468, 144 },
- { 596, 619, -182, -9 },
- { 895, 459, -176, -153 },
- { 557, 722, -126, -129 },
- { 601, 839, -523, 105 },
- { 562, 709, -499, 251 },
- { 803, 872, -695, 43 },
- },
- {
- { 648, 803, -444, 16 },
- { 972, 620, -576, 7 },
- { 561, 967, -499, -5 },
- { 585, 762, -468, 144 },
- { 596, 619, -182, -9 },
- { 895, 459, -176, -153 },
- { 557, 722, -126, -129 },
- { 601, 839, -523, 105 },
- { 562, 709, -499, 251 },
- { 803, 872, -695, 43 },
- }
-};
-#endif
-
-#if USE_3TAP_INTRA_FILTER
-static void filter_intra_predictors_3tap(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left, int mode) {
- int r, c;
- int mean, ipred;
-#if CONFIG_TX64X64
- int buffer[65][65];
-#else
- int buffer[33][33];
-#endif // CONFIG_TX64X64
- const int c0 = filter_intra_taps_3[tx_size][mode][0];
- const int c1 = filter_intra_taps_3[tx_size][mode][1];
- const int c2 = filter_intra_taps_3[tx_size][mode][2];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
-
- mean = 0;
- for (r = 0; r < bh; ++r) {
- mean += (int)left[r];
- }
- for (c = 0; c < bw; ++c) {
- mean += (int)above[c];
- }
- mean = (mean + ((bw + bh) >> 1)) / (bw + bh);
-
- for (r = 0; r < bh; ++r) buffer[r + 1][0] = (int)left[r] - mean;
-
- for (c = 0; c < bw + 1; ++c) buffer[0][c] = (int)above[c - 1] - mean;
- for (r = 1; r < bh + 1; ++r)
- for (c = 1; c < bw + 1; ++c) {
- ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
- c2 * buffer[r - 1][c - 1];
- buffer[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
- buffer[r][c] = clip_pixel(buffer[r][c] + mean) - mean;
- }
-
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = clip_pixel(buffer[r + 1][c + 1] + mean);
- }
- dst += stride;
- }
-}
-#else
-static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left, int mode) {
+void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
+ TX_SIZE tx_size, const uint8_t *above,
+ const uint8_t *left, int mode) {
int r, c;
- int mean, ipred;
-#if CONFIG_TX64X64
- int buffer[65][129];
-#else
- int buffer[33][65];
-#endif // CONFIG_TX64X64
- const int c0 = filter_intra_taps_4[tx_size][mode][0];
- const int c1 = filter_intra_taps_4[tx_size][mode][1];
- const int c2 = filter_intra_taps_4[tx_size][mode][2];
- const int c3 = filter_intra_taps_4[tx_size][mode][3];
+ uint8_t buffer[33][33];
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
- mean = 0;
- for (r = 0; r < bh; ++r) {
- mean += (int)left[r];
- }
- for (c = 0; c < bw; ++c) {
- mean += (int)above[c];
- }
- mean = (mean + ((bw + bh) >> 1)) / (bw + bh);
-
- for (r = 0; r < bh; ++r) buffer[r + 1][0] = (int)left[r] - mean;
-
- for (c = 0; c < 2 * bw + 1; ++c) buffer[0][c] = (int)above[c - 1] - mean;
-
- for (r = 1; r < bh + 1; ++r)
- for (c = 1; c < 2 * bw + 1 - r; ++c) {
- ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
- c2 * buffer[r - 1][c - 1] + c3 * buffer[r - 1][c + 1];
- buffer[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
- buffer[r][c] = clip_pixel(buffer[r][c] + mean) - mean;
+ assert(bw <= 32 && bh <= 32);
+
+ // The initialization is just for silencing Jenkins static analysis warnings
+ for (r = 0; r < bh + 1; ++r)
+ memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
+
+ for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
+ memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
+
+ for (r = 1; r < bh + 1; r += 2)
+ for (c = 1; c < bw + 1; c += 4) {
+ const uint8_t p0 = buffer[r - 1][c - 1];
+ const uint8_t p1 = buffer[r - 1][c];
+ const uint8_t p2 = buffer[r - 1][c + 1];
+ const uint8_t p3 = buffer[r - 1][c + 2];
+ const uint8_t p4 = buffer[r - 1][c + 3];
+ const uint8_t p5 = buffer[r][c - 1];
+ const uint8_t p6 = buffer[r + 1][c - 1];
+ for (int k = 0; k < 8; ++k) {
+ int r_offset = k >> 2;
+ int c_offset = k & 0x03;
+ buffer[r + r_offset][c + c_offset] =
+ clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
+ av1_filter_intra_taps[mode][k][0] * p0 +
+ av1_filter_intra_taps[mode][k][1] * p1 +
+ av1_filter_intra_taps[mode][k][2] * p2 +
+ av1_filter_intra_taps[mode][k][3] * p3 +
+ av1_filter_intra_taps[mode][k][4] * p4 +
+ av1_filter_intra_taps[mode][k][5] * p5 +
+ av1_filter_intra_taps[mode][k][6] * p6,
+ FILTER_INTRA_SCALE_BITS));
+ }
}
for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = clip_pixel(buffer[r + 1][c + 1] + mean);
- }
+ memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
dst += stride;
}
}
-#endif
-
-void av1_dc_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_DC_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_DC_PRED);
-#endif
-}
-void av1_v_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_V_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_V_PRED);
-#endif
-}
-
-void av1_h_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_H_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_H_PRED);
-#endif
-}
-
-void av1_d45_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D45_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D45_PRED);
-#endif
-}
-
-void av1_d135_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D135_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D135_PRED);
-#endif
-}
-
-void av1_d117_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D117_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D117_PRED);
-#endif
-}
-
-void av1_d153_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D153_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D153_PRED);
-#endif
-}
-
-void av1_d207_filter_predictor_c(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D207_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D207_PRED);
-#endif
-}
-
-void av1_d63_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D63_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D63_PRED);
-#endif
-}
-
-void av1_tm_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
-#if USE_3TAP_INTRA_FILTER
- filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_TM_PRED);
-#else
- filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_TM_PRED);
-#endif
-}
-
-static void filter_intra_predictors(FILTER_INTRA_MODE mode, uint8_t *dst,
- ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left) {
- switch (mode) {
- case FILTER_DC_PRED:
- av1_dc_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_V_PRED:
- av1_v_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_H_PRED:
- av1_h_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D45_PRED:
- av1_d45_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D135_PRED:
- av1_d135_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D117_PRED:
- av1_d117_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D153_PRED:
- av1_d153_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D207_PRED:
- av1_d207_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_D63_PRED:
- av1_d63_filter_predictor(dst, stride, tx_size, above, left);
- break;
- case FILTER_TM_PRED:
- av1_tm_filter_predictor(dst, stride, tx_size, above, left);
- break;
- default: assert(0);
- }
-}
-#if CONFIG_HIGHBITDEPTH
-#if USE_3TAP_INTRA_FILTER
-static void highbd_filter_intra_predictors_3tap(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size,
- const uint16_t *above,
- const uint16_t *left, int mode,
- int bd) {
+static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
+ TX_SIZE tx_size,
+ const uint16_t *above,
+ const uint16_t *left, int mode,
+ int bd) {
int r, c;
- int mean, ipred;
-#if CONFIG_TX64X64
- int preds[65][65];
-#else
- int preds[33][33];
-#endif // CONFIG_TX64X64
- const int c0 = filter_intra_taps_3[tx_size][mode][0];
- const int c1 = filter_intra_taps_3[tx_size][mode][1];
- const int c2 = filter_intra_taps_3[tx_size][mode][2];
+ uint16_t buffer[33][33];
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
- mean = 0;
- for (r = 0; r < bh; ++r) {
- mean += (int)left[r];
- }
- for (c = 0; c < bw; ++c) {
- mean += (int)above[c];
- }
- mean = (mean + ((bw + bh) >> 1)) / (bw + bh);
-
- for (r = 0; r < bh; ++r) preds[r + 1][0] = (int)left[r] - mean;
-
- for (c = 0; c < bw + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
-
- for (r = 1; r < bh + 1; ++r)
- for (c = 1; c < bw + 1; ++c) {
- ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
- c2 * preds[r - 1][c - 1];
- preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
- preds[r][c] = clip_pixel_highbd(preds[r][c] + mean, bd) - mean;
+ assert(bw <= 32 && bh <= 32);
+
+ // The initialization is just for silencing Jenkins static analysis warnings
+ for (r = 0; r < bh + 1; ++r)
+ memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
+
+ for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
+ memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
+
+ for (r = 1; r < bh + 1; r += 2)
+ for (c = 1; c < bw + 1; c += 4) {
+ const uint16_t p0 = buffer[r - 1][c - 1];
+ const uint16_t p1 = buffer[r - 1][c];
+ const uint16_t p2 = buffer[r - 1][c + 1];
+ const uint16_t p3 = buffer[r - 1][c + 2];
+ const uint16_t p4 = buffer[r - 1][c + 3];
+ const uint16_t p5 = buffer[r][c - 1];
+ const uint16_t p6 = buffer[r + 1][c - 1];
+ for (int k = 0; k < 8; ++k) {
+ int r_offset = k >> 2;
+ int c_offset = k & 0x03;
+ buffer[r + r_offset][c + c_offset] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
+ av1_filter_intra_taps[mode][k][0] * p0 +
+ av1_filter_intra_taps[mode][k][1] * p1 +
+ av1_filter_intra_taps[mode][k][2] * p2 +
+ av1_filter_intra_taps[mode][k][3] * p3 +
+ av1_filter_intra_taps[mode][k][4] * p4 +
+ av1_filter_intra_taps[mode][k][5] * p5 +
+ av1_filter_intra_taps[mode][k][6] * p6,
+ FILTER_INTRA_SCALE_BITS),
+ bd);
+ }
}
for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = clip_pixel_highbd(preds[r + 1][c + 1] + mean, bd);
- }
+ memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
dst += stride;
}
}
-#else
-static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size,
- const uint16_t *above,
- const uint16_t *left, int mode,
- int bd) {
- int r, c;
- int mean, ipred;
-#if CONFIG_TX64X64
- int preds[65][129];
-#else
- int preds[33][65];
-#endif // CONFIG_TX64X64
- const int c0 = filter_intra_taps_4[tx_size][mode][0];
- const int c1 = filter_intra_taps_4[tx_size][mode][1];
- const int c2 = filter_intra_taps_4[tx_size][mode][2];
- const int c3 = filter_intra_taps_4[tx_size][mode][3];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
- mean = 0;
- for (r = 0; r < bh; ++r) {
- mean += (int)left[r];
- }
- for (c = 0; c < bw; ++c) {
- mean += (int)above[c];
- }
- mean = (mean + ((bw + bh) >> 1)) / (bw + bh);
-
- for (r = 0; r < bh; ++r) preds[r + 1][0] = (int)left[r] - mean;
-
- for (c = 0; c < 2 * bw + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
-
- for (r = 1; r < bh + 1; ++r)
- for (c = 1; c < 2 * bw + 1 - r; ++c) {
- ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
- c2 * preds[r - 1][c - 1] + c3 * preds[r - 1][c + 1];
- preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
- preds[r][c] = clip_pixel_highbd(preds[r][c] + mean, bd) - mean;
- }
+static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
+ if (plane == 0) {
+ const PREDICTION_MODE mode = mbmi->mode;
+ return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
+ mode == SMOOTH_H_PRED);
+ } else {
+ // uv_mode is not set for inter blocks, so need to explicitly
+ // detect that case.
+ if (is_inter_block(mbmi)) return 0;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = clip_pixel_highbd(preds[r + 1][c + 1] + mean, bd);
- }
- dst += stride;
+ const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
+ return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
+ uv_mode == UV_SMOOTH_H_PRED);
}
}
-#endif
-void av1_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_DC_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_DC_PRED, bd);
-#endif
-}
-
-void av1_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_V_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_V_PRED, bd);
-#endif
-}
-
-void av1_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_H_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_H_PRED, bd);
-#endif
-}
-
-void av1_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D45_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D45_PRED, bd);
-#endif
-}
-
-void av1_highbd_d135_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D135_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D135_PRED, bd);
-#endif
-}
-
-void av1_highbd_d117_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D117_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D117_PRED, bd);
-#endif
-}
-
-void av1_highbd_d153_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D153_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D153_PRED, bd);
-#endif
-}
-
-void av1_highbd_d207_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D207_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D207_PRED, bd);
-#endif
-}
-
-void av1_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_D63_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_D63_PRED, bd);
-#endif
-}
+static int get_filt_type(const MACROBLOCKD *xd, int plane) {
+ int ab_sm, le_sm;
-void av1_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int bd) {
-#if USE_3TAP_INTRA_FILTER
- highbd_filter_intra_predictors_3tap(dst, stride, tx_size, above, left,
- FILTER_TM_PRED, bd);
-#else
- highbd_filter_intra_predictors_4tap(dst, stride, tx_size, above, left,
- FILTER_TM_PRED, bd);
-#endif
-}
-
-static void highbd_filter_intra_predictors(FILTER_INTRA_MODE mode,
- uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- switch (mode) {
- case FILTER_DC_PRED:
- av1_highbd_dc_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_V_PRED:
- av1_highbd_v_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_H_PRED:
- av1_highbd_h_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D45_PRED:
- av1_highbd_d45_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D135_PRED:
- av1_highbd_d135_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D117_PRED:
- av1_highbd_d117_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D153_PRED:
- av1_highbd_d153_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D207_PRED:
- av1_highbd_d207_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_D63_PRED:
- av1_highbd_d63_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- case FILTER_TM_PRED:
- av1_highbd_tm_filter_predictor(dst, stride, tx_size, above, left, bd);
- break;
- default: assert(0);
+ if (plane == 0) {
+ const MB_MODE_INFO *ab = xd->above_mbmi;
+ const MB_MODE_INFO *le = xd->left_mbmi;
+ ab_sm = ab ? is_smooth(ab, plane) : 0;
+ le_sm = le ? is_smooth(le, plane) : 0;
+ } else {
+ const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
+ const MB_MODE_INFO *le = xd->chroma_left_mbmi;
+ ab_sm = ab ? is_smooth(ab, plane) : 0;
+ le_sm = le ? is_smooth(le, plane) : 0;
}
+
+ return (ab_sm || le_sm) ? 1 : 0;
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_INTRA_EDGE
-static int intra_edge_filter_strength(int bsz, int delta) {
+static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
const int d = abs(delta);
int strength = 0;
- switch (bsz) {
- case 4:
- if (d < 56) {
- strength = 0;
- } else if (d < 90) {
- strength = 1;
- }
- break;
- case 8:
- if (d < 8) {
- strength = 0;
- } else if (d < 32) {
- strength = 1;
- } else if (d < 90) {
- strength = 3;
- }
- break;
- case 16:
- if (d < 4) {
- strength = 0;
- } else if (d < 16) {
- strength = 1;
- } else if (d < 90) {
- strength = 3;
- }
- break;
- case 32:
- if (d < 16) {
- strength = 2;
- } else if (d < 90) {
- strength = 3;
- }
- break;
- default: strength = 0; break;
+ const int blk_wh = bs0 + bs1;
+ if (type == 0) {
+ if (blk_wh <= 8) {
+ if (d >= 56) strength = 1;
+ } else if (blk_wh <= 12) {
+ if (d >= 40) strength = 1;
+ } else if (blk_wh <= 16) {
+ if (d >= 40) strength = 1;
+ } else if (blk_wh <= 24) {
+ if (d >= 8) strength = 1;
+ if (d >= 16) strength = 2;
+ if (d >= 32) strength = 3;
+ } else if (blk_wh <= 32) {
+ if (d >= 1) strength = 1;
+ if (d >= 4) strength = 2;
+ if (d >= 32) strength = 3;
+ } else {
+ if (d >= 1) strength = 3;
+ }
+ } else {
+ if (blk_wh <= 8) {
+ if (d >= 40) strength = 1;
+ if (d >= 64) strength = 2;
+ } else if (blk_wh <= 16) {
+ if (d >= 20) strength = 1;
+ if (d >= 48) strength = 2;
+ } else if (blk_wh <= 24) {
+ if (d >= 4) strength = 3;
+ } else {
+ if (d >= 1) strength = 3;
+ }
}
-
return strength;
}
@@ -2229,7 +1015,7 @@ void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
uint8_t edge[129];
memcpy(edge, p, sz * sizeof(*p));
- for (int i = 1; i < sz - 1; i++) {
+ for (int i = 1; i < sz; i++) {
int s = 0;
for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
int k = i - 2 + j;
@@ -2242,7 +1028,16 @@ void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
}
}
-#if CONFIG_HIGHBITDEPTH
+static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
+ const int kernel[3] = { 5, 6, 5 };
+
+ int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
+ (p_above[0] * kernel[2]);
+ s = (s + 8) >> 4;
+ p_above[-1] = s;
+ p_left[-1] = s;
+}
+
void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
if (!strength) return;
@@ -2253,7 +1048,7 @@ void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
uint16_t edge[129];
memcpy(edge, p, sz * sizeof(*p));
- for (int i = 1; i < sz - 1; i++) {
+ for (int i = 1; i < sz; i++) {
int s = 0;
for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
int k = i - 2 + j;
@@ -2265,12 +1060,22 @@ void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
p[i] = s;
}
}
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_INTRA_EDGE_UPSAMPLE
-static int use_intra_edge_upsample(int bsz, int delta) {
+static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
+ const int kernel[3] = { 5, 6, 5 };
+
+ int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
+ (p_above[0] * kernel[2]);
+ s = (s + 8) >> 4;
+ p_above[-1] = s;
+ p_left[-1] = s;
+}
+
+static int use_intra_edge_upsample(int bs0, int bs1, int delta, int type) {
const int d = abs(delta);
- return (bsz == 4 && d > 0 && d < 56);
+ const int blk_wh = bs0 + bs1;
+ if (d <= 0 || d >= 40) return 0;
+ return type ? (blk_wh <= 8) : (blk_wh <= 16);
}
void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
@@ -2296,7 +1101,6 @@ void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
}
}
-#if CONFIG_HIGHBITDEPTH
void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
// interpolate half-sample positions
assert(sz <= MAX_UPSAMPLE_SZ);
@@ -2320,16 +1124,13 @@ void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
p[2 * i] = in[i + 2];
}
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
-
-#endif // CONFIG_INTRA_EDGE
-#if CONFIG_HIGHBITDEPTH
static void build_intra_predictors_high(
const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
- int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px,
- int n_topright_px, int n_left_px, int n_bottomleft_px, int plane) {
+ int dst_stride, PREDICTION_MODE mode, int angle_delta,
+ FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
+ int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
+ int n_bottomleft_px, int plane) {
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
@@ -2339,36 +1140,25 @@ static void build_intra_predictors_high(
uint16_t *const left_col = left_data + 16;
const int txwpx = tx_size_wide[tx_size];
const int txhpx = tx_size_high[tx_size];
-#if !INTRA_USES_RECT_TRANSFORMS
- assert(txwpx == txhpx);
-#endif // !INTRA_USES_RECT_TRANSFORMS
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
const uint16_t *above_ref = ref - ref_stride;
-#if CONFIG_EXT_INTRA
+ const uint16_t *left_ref = ref - 1;
int p_angle = 0;
- const int is_dr_mode = av1_is_directional_mode(mode, xd->mi[0]->mbmi.sb_type);
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- const FILTER_INTRA_MODE_INFO *filter_intra_mode_info =
- &xd->mi[0]->mbmi.filter_intra_mode_info;
- const FILTER_INTRA_MODE filter_intra_mode =
- filter_intra_mode_info->filter_intra_mode[plane != 0];
-#endif // CONFIG_FILTER_INTRA
+ const int is_dr_mode = av1_is_directional_mode(mode);
+ const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
int base = 128 << (xd->bd - 8);
+ // The default values if ref pixels are not available:
// base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
// base+1 A B .. Y Z
// base+1 C D .. W X
// base+1 E F .. U V
// base+1 G H .. S T T T T T
- aom_memset16(left_data, base + 1, sizeof(left_data) / sizeof(*left_data));
-#if CONFIG_EXT_INTRA
if (is_dr_mode) {
- p_angle = mode_to_angle_map[mode] +
- xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ p_angle = mode_to_angle_map[mode] + angle_delta;
if (p_angle <= 90)
need_above = 1, need_left = 0, need_above_left = 1;
else if (p_angle < 180)
@@ -2376,29 +1166,20 @@ static void build_intra_predictors_high(
else
need_above = 0, need_left = 1, need_above_left = 1;
}
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_left = need_above = need_above_left = 1;
-#endif // CONFIG_FILTER_INTRA
+ if (use_filter_intra) need_left = need_above = need_above_left = 1;
- (void)plane;
assert(n_top_px >= 0);
assert(n_topright_px >= 0);
assert(n_left_px >= 0);
assert(n_bottomleft_px >= 0);
if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
-#if CONFIG_INTRA_EDGE
int val;
if (need_left) {
val = (n_top_px > 0) ? above_ref[0] : base + 1;
} else {
- val = (n_left_px > 0) ? ref[-1] : base - 1;
+ val = (n_left_px > 0) ? left_ref[0] : base - 1;
}
-#else
- const int val = need_left ? base + 1 : base - 1;
-#endif // CONFIG_INTRA_EDGE
for (i = 0; i < txhpx; ++i) {
aom_memset16(dst, val, txwpx);
dst += dst_stride;
@@ -2408,56 +1189,34 @@ static void build_intra_predictors_high(
// NEED_LEFT
if (need_left) {
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_bottom = 0;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
+ if (use_filter_intra) need_bottom = 0;
if (is_dr_mode) need_bottom = p_angle > 180;
-#endif // CONFIG_EXT_INTRA
-#else
- const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
-#endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
i = 0;
if (n_left_px > 0) {
- for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+ for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
if (need_bottom && n_bottomleft_px > 0) {
assert(i == txhpx);
for (; i < txhpx + n_bottomleft_px; i++)
- left_col[i] = ref[i * ref_stride - 1];
+ left_col[i] = left_ref[i * ref_stride];
}
if (i < num_left_pixels_needed)
aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
} else {
-#if CONFIG_INTRA_EDGE
if (n_top_px > 0) {
aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
} else {
-#endif // CONFIG_INTRA_EDGE
aom_memset16(left_col, base + 1, num_left_pixels_needed);
-#if CONFIG_INTRA_EDGE
}
-#endif // CONFIG_INTRA_EDGE
}
}
// NEED_ABOVE
if (need_above) {
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_right = 1;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
+ if (use_filter_intra) need_right = 0;
if (is_dr_mode) need_right = p_angle < 90;
-#endif // CONFIG_EXT_INTRA
-#else
- const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
-#endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
@@ -2472,92 +1231,75 @@ static void build_intra_predictors_high(
aom_memset16(&above_row[i], above_row[i - 1],
num_top_pixels_needed - i);
} else {
-#if CONFIG_INTRA_EDGE
if (n_left_px > 0) {
- aom_memset16(above_row, ref[-1], num_top_pixels_needed);
+ aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
} else {
-#endif // CONFIG_INTRA_EDGE
aom_memset16(above_row, base - 1, num_top_pixels_needed);
-#if CONFIG_INTRA_EDGE
}
-#endif // CONFIG_INTRA_EDGE
}
}
if (need_above_left) {
-#if CONFIG_INTRA_EDGE
if (n_top_px > 0 && n_left_px > 0) {
above_row[-1] = above_ref[-1];
} else if (n_top_px > 0) {
above_row[-1] = above_ref[0];
} else if (n_left_px > 0) {
- above_row[-1] = ref[-1];
+ above_row[-1] = left_ref[0];
} else {
above_row[-1] = base;
}
-#else
- above_row[-1] =
- n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
-#endif // CONFIG_INTRA_EDGE
left_col[-1] = above_row[-1];
}
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0]) {
- highbd_filter_intra_predictors(filter_intra_mode, dst, dst_stride, tx_size,
- above_row, left_col, xd->bd);
+ if (use_filter_intra) {
+ highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
+ filter_intra_mode, xd->bd);
return;
}
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
if (is_dr_mode) {
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter = INTRA_FILTER_LINEAR;
- if (plane == 0 && av1_is_intra_filter_switchable(p_angle))
- filter = xd->mi[0]->mbmi.intra_filter;
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE
- const int need_right = p_angle < 90;
- const int need_bottom = p_angle > 180;
- if (p_angle != 90 && p_angle != 180) {
- const int ab_le = need_above_left ? 1 : 0;
- if (need_above && n_top_px > 0) {
- const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
- const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
- av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
+ int upsample_above = 0;
+ int upsample_left = 0;
+ if (!disable_edge_filter) {
+ const int need_right = p_angle < 90;
+ const int need_bottom = p_angle > 180;
+ const int filt_type = get_filt_type(xd, plane);
+ if (p_angle != 90 && p_angle != 180) {
+ const int ab_le = need_above_left ? 1 : 0;
+ if (need_above && need_left && (txwpx + txhpx >= 24)) {
+ filter_intra_edge_corner_high(above_row, left_col);
+ }
+ if (need_above && n_top_px > 0) {
+ const int strength =
+ intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
+ const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
+ av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
+ }
+ if (need_left && n_left_px > 0) {
+ const int strength = intra_edge_filter_strength(
+ txhpx, txwpx, p_angle - 180, filt_type);
+ const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
+ av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
+ }
}
- if (need_left && n_left_px > 0) {
- const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
- const int n_px =
- n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
- av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
+ upsample_above =
+ use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
+ if (need_above && upsample_above) {
+ const int n_px = txwpx + (need_right ? txhpx : 0);
+ av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
+ }
+ upsample_left =
+ use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
+ if (need_left && upsample_left) {
+ const int n_px = txhpx + (need_bottom ? txwpx : 0);
+ av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
}
}
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
- if (need_above && upsample_above) {
- const int n_px = txwpx + (need_right ? txhpx : 0);
- av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
- }
- const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
- if (need_left && upsample_left) {
- const int n_px = txhpx + (need_bottom ? txwpx : 0);
- av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
- }
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
-#endif // CONFIG_INTRA_EDGE
highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
-#if CONFIG_INTRA_INTERP
- filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above, upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- p_angle, xd->bd);
+ upsample_above, upsample_left, p_angle, xd->bd);
return;
}
-#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
@@ -2567,52 +1309,41 @@ static void build_intra_predictors_high(
pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
}
}
-#endif // CONFIG_HIGHBITDEPTH
static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int ref_stride, uint8_t *dst, int dst_stride,
- PREDICTION_MODE mode, TX_SIZE tx_size,
+ PREDICTION_MODE mode, int angle_delta,
+ FILTER_INTRA_MODE filter_intra_mode,
+ TX_SIZE tx_size, int disable_edge_filter,
int n_top_px, int n_topright_px,
int n_left_px, int n_bottomleft_px,
int plane) {
int i;
const uint8_t *above_ref = ref - ref_stride;
+ const uint8_t *left_ref = ref - 1;
DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
uint8_t *const above_row = above_data + 16;
uint8_t *const left_col = left_data + 16;
const int txwpx = tx_size_wide[tx_size];
const int txhpx = tx_size_high[tx_size];
-#if !INTRA_USES_RECT_TRANSFORMS
- assert(txwpx == txhpx);
-#endif // !INTRA_USES_RECT_TRANSFORMS
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
-#if CONFIG_EXT_INTRA
int p_angle = 0;
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int is_dr_mode = av1_is_directional_mode(mode, mbmi->sb_type);
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- const FILTER_INTRA_MODE_INFO *filter_intra_mode_info =
- &xd->mi[0]->mbmi.filter_intra_mode_info;
- const FILTER_INTRA_MODE filter_intra_mode =
- filter_intra_mode_info->filter_intra_mode[plane != 0];
-#endif // CONFIG_FILTER_INTRA
+ const int is_dr_mode = av1_is_directional_mode(mode);
+ const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
+ // The default values if ref pixels are not available:
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// ..
- memset(left_data, 129, sizeof(left_data));
-#if CONFIG_EXT_INTRA
if (is_dr_mode) {
- p_angle = mode_to_angle_map[mode] +
- xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ p_angle = mode_to_angle_map[mode] + angle_delta;
if (p_angle <= 90)
need_above = 1, need_left = 0, need_above_left = 1;
else if (p_angle < 180)
@@ -2620,30 +1351,20 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
else
need_above = 0, need_left = 1, need_above_left = 1;
}
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_left = need_above = need_above_left = 1;
-#endif // CONFIG_FILTER_INTRA
-
- (void)xd;
- (void)plane;
+ if (use_filter_intra) need_left = need_above = need_above_left = 1;
+
assert(n_top_px >= 0);
assert(n_topright_px >= 0);
assert(n_left_px >= 0);
assert(n_bottomleft_px >= 0);
if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
-#if CONFIG_INTRA_EDGE
int val;
if (need_left) {
val = (n_top_px > 0) ? above_ref[0] : 129;
} else {
- val = (n_left_px > 0) ? ref[-1] : 127;
+ val = (n_left_px > 0) ? left_ref[0] : 127;
}
-#else
- const int val = need_left ? 129 : 127;
-#endif // CONFIG_INTRA_EDGE
for (i = 0; i < txhpx; ++i) {
memset(dst, val, txwpx);
dst += dst_stride;
@@ -2653,56 +1374,34 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
// NEED_LEFT
if (need_left) {
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_bottom = 0;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
+ if (use_filter_intra) need_bottom = 0;
if (is_dr_mode) need_bottom = p_angle > 180;
-#endif // CONFIG_EXT_INTRA
-#else
- const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
-#endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
i = 0;
if (n_left_px > 0) {
- for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+ for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
if (need_bottom && n_bottomleft_px > 0) {
assert(i == txhpx);
for (; i < txhpx + n_bottomleft_px; i++)
- left_col[i] = ref[i * ref_stride - 1];
+ left_col[i] = left_ref[i * ref_stride];
}
if (i < num_left_pixels_needed)
memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
} else {
-#if CONFIG_INTRA_EDGE
if (n_top_px > 0) {
memset(left_col, above_ref[0], num_left_pixels_needed);
} else {
-#endif // CONFIG_INTRA_EDGE
memset(left_col, 129, num_left_pixels_needed);
-#if CONFIG_INTRA_EDGE
}
-#endif // CONFIG_INTRA_EDGE
}
}
// NEED_ABOVE
if (need_above) {
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA
int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0])
- need_right = 1;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
+ if (use_filter_intra) need_right = 0;
if (is_dr_mode) need_right = p_angle < 90;
-#endif // CONFIG_EXT_INTRA
-#else
- const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
-#endif // CONFIG_EXT_INTRA || CONFIG_FITLER_INTRA
const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px);
@@ -2715,91 +1414,75 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
if (i < num_top_pixels_needed)
memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
} else {
-#if CONFIG_INTRA_EDGE
if (n_left_px > 0) {
- memset(above_row, ref[-1], num_top_pixels_needed);
+ memset(above_row, left_ref[0], num_top_pixels_needed);
} else {
-#endif // CONFIG_INTRA_EDGE
memset(above_row, 127, num_top_pixels_needed);
-#if CONFIG_INTRA_EDGE
}
-#endif // CONFIG_INTRA_EDGE
}
}
if (need_above_left) {
-#if CONFIG_INTRA_EDGE
if (n_top_px > 0 && n_left_px > 0) {
above_row[-1] = above_ref[-1];
} else if (n_top_px > 0) {
above_row[-1] = above_ref[0];
} else if (n_left_px > 0) {
- above_row[-1] = ref[-1];
+ above_row[-1] = left_ref[0];
} else {
above_row[-1] = 128;
}
-#else
- above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
-#endif // CONFIG_INTRA_EDGE
left_col[-1] = above_row[-1];
}
-#if CONFIG_FILTER_INTRA
- if (filter_intra_mode_info->use_filter_intra_mode[plane != 0]) {
- filter_intra_predictors(filter_intra_mode, dst, dst_stride, tx_size,
- above_row, left_col);
+ if (use_filter_intra) {
+ av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
+ filter_intra_mode);
return;
}
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_EXT_INTRA
if (is_dr_mode) {
-#if CONFIG_INTRA_INTERP
- INTRA_FILTER filter = INTRA_FILTER_LINEAR;
- if (plane == 0 && av1_is_intra_filter_switchable(p_angle))
- filter = xd->mi[0]->mbmi.intra_filter;
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE
- const int need_right = p_angle < 90;
- const int need_bottom = p_angle > 180;
- if (p_angle != 90 && p_angle != 180) {
- const int ab_le = need_above_left ? 1 : 0;
- if (need_above && n_top_px > 0) {
- const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
- const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
- av1_filter_intra_edge(above_row - ab_le, n_px, strength);
+ int upsample_above = 0;
+ int upsample_left = 0;
+ if (!disable_edge_filter) {
+ const int need_right = p_angle < 90;
+ const int need_bottom = p_angle > 180;
+ const int filt_type = get_filt_type(xd, plane);
+ if (p_angle != 90 && p_angle != 180) {
+ const int ab_le = need_above_left ? 1 : 0;
+ if (need_above && need_left && (txwpx + txhpx >= 24)) {
+ filter_intra_edge_corner(above_row, left_col);
+ }
+ if (need_above && n_top_px > 0) {
+ const int strength =
+ intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
+ const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
+ av1_filter_intra_edge(above_row - ab_le, n_px, strength);
+ }
+ if (need_left && n_left_px > 0) {
+ const int strength = intra_edge_filter_strength(
+ txhpx, txwpx, p_angle - 180, filt_type);
+ const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
+ av1_filter_intra_edge(left_col - ab_le, n_px, strength);
+ }
}
- if (need_left && n_left_px > 0) {
- const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
- const int n_px =
- n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
- av1_filter_intra_edge(left_col - ab_le, n_px, strength);
+ upsample_above =
+ use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
+ if (need_above && upsample_above) {
+ const int n_px = txwpx + (need_right ? txhpx : 0);
+ av1_upsample_intra_edge(above_row, n_px);
+ }
+ upsample_left =
+ use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
+ if (need_left && upsample_left) {
+ const int n_px = txhpx + (need_bottom ? txwpx : 0);
+ av1_upsample_intra_edge(left_col, n_px);
}
}
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
- if (need_above && upsample_above) {
- const int n_px = txwpx + (need_right ? txhpx : 0);
- av1_upsample_intra_edge(above_row, n_px);
- }
- const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
- if (need_left && upsample_left) {
- const int n_px = txhpx + (need_bottom ? txwpx : 0);
- av1_upsample_intra_edge(left_col, n_px);
- }
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
-#endif // CONFIG_INTRA_EDGE
- dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
-#if CONFIG_INTRA_INTERP
- filter,
-#endif // CONFIG_INTRA_INTERP
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- upsample_above, upsample_left,
-#endif // CONFIG_INTRA_EDGE_UPSAMPLE
- p_angle);
+ dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
+ upsample_left, p_angle);
return;
}
-#endif // CONFIG_EXT_INTRA
// predict
if (mode == DC_PRED) {
@@ -2810,41 +1493,54 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
}
}
-static void predict_intra_block_helper(const AV1_COMMON *cm,
- const MACROBLOCKD *xd, int wpx, int hpx,
- TX_SIZE tx_size, PREDICTION_MODE mode,
- const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride,
- int col_off, int row_off, int plane) {
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+void av1_predict_intra_block(
+ const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
+ TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
+ FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
+ uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
+ const int txwpx = tx_size_wide[tx_size];
+ const int txhpx = tx_size_high[tx_size];
+ const int x = col_off << tx_size_wide_log2[0];
+ const int y = row_off << tx_size_high_log2[0];
+
+ if (use_palette) {
+ int r, c;
+ const uint8_t *const map = xd->plane[plane != 0].color_index_map +
+ xd->color_index_map_offset[plane != 0];
+ const uint16_t *const palette =
+ mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ for (r = 0; r < txhpx; ++r) {
+ for (c = 0; c < txwpx; ++c) {
+ dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
+ }
+ }
+ } else {
+ for (r = 0; r < txhpx; ++r) {
+ for (c = 0; c < txwpx; ++c) {
+ dst[r * dst_stride + c] =
+ (uint8_t)palette[map[(r + y) * wpx + c + x]];
+ }
+ }
+ }
+ return;
+ }
+
+ BLOCK_SIZE bsize = mbmi->sb_type;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int txw = tx_size_wide_unit[tx_size];
-#if CONFIG_CB4X4 && CONFIG_CHROMA_SUB8X8
+ const int txh = tx_size_high_unit[tx_size];
const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
: xd->up_available);
const int have_left =
col_off ||
(pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
-#else
- const int have_top = row_off || xd->up_available;
- const int have_left = col_off || xd->left_available;
-#endif
- const int x = col_off << tx_size_wide_log2[0];
- const int y = row_off << tx_size_high_log2[0];
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
- const int txwpx = tx_size_wide[tx_size];
- const int txhpx = tx_size_high[tx_size];
-#if !INTRA_USES_RECT_TRANSFORMS
- assert(txwpx == txhpx);
-#endif // !INTRA_USES_RECT_TRANSFORMS
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 && !CONFIG_CHROMA_SUB8X8
- const int xr_chr_offset = (pd->subsampling_x && bsize < BLOCK_8X8) ? 2 : 0;
- const int yd_chr_offset = (pd->subsampling_y && bsize < BLOCK_8X8) ? 2 : 0;
-#else
const int xr_chr_offset = 0;
const int yd_chr_offset = 0;
-#endif
// Distance between the right edge of this prediction block to
// the frame right edge
@@ -2854,69 +1550,39 @@ static void predict_intra_block_helper(const AV1_COMMON *cm,
// the frame bottom edge
const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
(hpx - y - txhpx) - yd_chr_offset;
- const int right_available = mi_col + ((col_off + txw) << pd->subsampling_x >>
- (MI_SIZE_LOG2 - tx_size_wide_log2[0])) <
- xd->tile.mi_col_end;
- const int bottom_available = (yd > 0);
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition;
-#endif
+ const int right_available =
+ mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
+ const int bottom_available =
+ (yd > 0) &&
+ (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
+
+ const PARTITION_TYPE partition = mbmi->partition;
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
// force 4x4 chroma component block size.
bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-#endif
- const int have_top_right =
- has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available,
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- partition,
-#endif // CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
- tx_size, row_off, col_off, pd->subsampling_x);
- const int have_bottom_left =
- has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left,
- tx_size, row_off, col_off, pd->subsampling_y);
- if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
- const int stride = wpx;
- int r, c;
- const uint8_t *const map = xd->plane[plane != 0].color_index_map;
- uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
- plane * PALETTE_MAX_SIZE;
+ const int have_top_right = has_top_right(
+ cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
+ row_off, col_off, pd->subsampling_x, pd->subsampling_y);
+ const int have_bottom_left = has_bottom_left(
+ cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
+ tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (r = 0; r < txhpx; ++r) {
- for (c = 0; c < txwpx; ++c) {
- dst16[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
- }
- }
- } else {
-#endif // CONFIG_HIGHBITDEPTH
- for (r = 0; r < txhpx; ++r) {
- for (c = 0; c < txwpx; ++c) {
- dst[r * dst_stride + c] =
- (uint8_t)palette[map[(r + y) * stride + c + x]];
- }
- }
-#if CONFIG_HIGHBITDEPTH
- }
-#endif // CONFIG_HIGHBITDEPTH
- return;
- }
-
-#if CONFIG_HIGHBITDEPTH
+ const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_intra_predictors_high(
- xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+ xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
+ filter_intra_mode, tx_size, disable_edge_filter,
have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
have_top_right ? AOMMIN(txwpx, xr) : 0,
have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
return;
}
-#endif
- build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+
+ build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
+ angle_delta, filter_intra_mode, tx_size,
+ disable_edge_filter,
have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
have_top_right ? AOMMIN(txwpx, xr) : 0,
have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
@@ -2924,278 +1590,56 @@ static void predict_intra_block_helper(const AV1_COMMON *cm,
}
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int plane, int block_idx, int blk_col,
- int blk_row, TX_SIZE tx_size) {
- const MODE_INFO *mi = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ int plane, int blk_col, int blk_row,
+ TX_SIZE tx_size) {
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- const int block_raster_idx =
- av1_block_index_to_raster_order(tx_size, block_idx);
- const PREDICTION_MODE mode = (plane == AOM_PLANE_Y)
- ? get_y_mode(mi, block_raster_idx)
- : get_uv_mode(mbmi->uv_mode);
-#if CONFIG_CFL
+ const PREDICTION_MODE mode =
+ (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
+ const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
+ const FILTER_INTRA_MODE filter_intra_mode =
+ (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
+ ? mbmi->filter_intra_mode_info.filter_intra_mode
+ : FILTER_INTRA_MODES;
+ const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
+
if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
- if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
- // Avoid computing the CfL parameters twice, if they have already been
- // computed in cfl_rd_pick_alpha.
- if (!xd->cfl->are_parameters_computed)
- cfl_compute_parameters(xd, tx_size);
+#if CONFIG_DEBUG
+ assert(is_cfl_allowed(xd));
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(
+ mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
+ (void)plane_bsize;
+ assert(plane_bsize < BLOCK_SIZES_ALL);
+ if (!xd->lossless[mbmi->segment_id]) {
+ assert(blk_col == 0);
+ assert(blk_row == 0);
+ assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
+ assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
}
- cfl_predict_block(xd, dst, dst_stride, blk_row, blk_col, tx_size, plane);
- return;
- }
#endif
-
- av1_predict_intra_block(cm, xd, pd->width, pd->height,
- txsize_to_bsize[tx_size], mode, dst, dst_stride, dst,
- dst_stride, blk_col, blk_row, plane);
-}
-
-#if INTRA_USES_EXT_TRANSFORMS
-// Copy the given row of dst into the equivalent row of ref, saving
-// the overwritten data to tmp. Returns zero if no copy happened (so
-// no restore is needed)
-//
-// Note that ref_row and dst_row follow the usual hibd convention
-// where you convert to a uint16_t* with CONVERT_TO_SHORTPTR(). tmp
-// does not follow that convention: it's a genuine pointer which is
-// correctly aligned and sized for either 8 or 16 bit data.
-//
-// matching_strides is a boolean flag which should be nonzero if ref
-// and dst have the same stride.
-static int overwrite_ref_row(int matching_strides, int buf_flags,
- int block_width, const uint8_t *dst_row,
- uint8_t *ref_row, uint8_t *tmp_row) {
- if (ref_row == dst_row && matching_strides) return 0;
-
- int row_bytes = block_width;
-
-#if CONFIG_HIGHBITDEPTH
- if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
- row_bytes *= 2;
- ref_row = (uint8_t *)CONVERT_TO_SHORTPTR(ref_row);
- dst_row = (const uint8_t *)CONVERT_TO_SHORTPTR(dst_row);
- }
-#else
- (void)buf_flags;
-#endif // CONFIG_HIGHBITDEPTH
-
- memcpy(tmp_row, ref_row, row_bytes);
- memcpy(ref_row, dst_row, row_bytes);
- return 1;
-}
-
-static void restore_ref_row(int buf_flags, int block_width,
- const uint8_t *tmp_row, uint8_t *ref_row) {
- int row_bytes = block_width;
-#if CONFIG_HIGHBITDEPTH
- if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
- row_bytes *= 2;
- ref_row = (uint8_t *)CONVERT_TO_SHORTPTR(ref_row);
- }
-#else
- (void)buf_flags;
-#endif // CONFIG_HIGHBITDEPTH
-
- memcpy(ref_row, tmp_row, row_bytes);
-}
-
-// The column equivalent of overwrite_ref_row. ref_row and dst_row
-// point at the relevant column of the first row of the block.
-static int overwrite_ref_col(int buf_flags, int block_height,
- const uint8_t *dst_row, int dst_stride,
- uint8_t *ref_row, int ref_stride,
- uint8_t *tmp_row) {
- if (ref_row == dst_row && ref_stride == dst_stride) return 0;
-
-#if CONFIG_HIGHBITDEPTH
- if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *tmp_16 = (uint16_t *)tmp_row;
- uint16_t *ref_16 = CONVERT_TO_SHORTPTR(ref_row);
- const uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst_row);
-
- for (int i = 0; i < block_height; ++i) {
- tmp_16[i] = ref_16[i * ref_stride];
- ref_16[i * ref_stride] = dst_16[i * dst_stride];
- }
- } else {
-#endif // CONFIG_HIGHBITDEPTH
- for (int i = 0; i < block_height; ++i) {
- tmp_row[i] = ref_row[i * ref_stride];
- ref_row[i * ref_stride] = dst_row[i * dst_stride];
- }
-#if CONFIG_HIGHBITDEPTH
- }
-#else
- (void)buf_flags;
-#endif // CONFIG_HIGHBITDEPTH
- return 1;
-}
-
-static void restore_ref_col(int buf_flags, int block_height,
- const uint8_t *tmp_row, uint8_t *ref_row,
- int ref_stride) {
-#if CONFIG_HIGHBITDEPTH
- if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *tmp_16 = (const uint16_t *)tmp_row;
- uint16_t *ref_16 = CONVERT_TO_SHORTPTR(ref_row);
-
- for (int i = 0; i < block_height; ++i) {
- ref_16[i * ref_stride] = tmp_16[i];
- }
- } else {
-#endif // CONFIG_HIGHBITDEPTH
- for (int i = 0; i < block_height; ++i) {
- ref_row[i * ref_stride] = tmp_row[i];
+ CFL_CTX *const cfl = &xd->cfl;
+ CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
+ if (cfl->dc_pred_is_cached[pred_plane] == 0) {
+ av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
+ angle_delta, use_palette, filter_intra_mode, dst,
+ dst_stride, dst, dst_stride, blk_col, blk_row,
+ plane);
+ if (cfl->use_dc_pred_cache) {
+ cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
+ cfl->dc_pred_is_cached[pred_plane] = 1;
+ }
+ } else {
+ cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
}
-#if CONFIG_HIGHBITDEPTH
- }
-#else
- (void)buf_flags;
-#endif // CONFIG_HIGHBITDEPTH
-}
-#endif // #if INTRA_USES_EXT_TRANSFORMS
-
-void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int wpx, int hpx, BLOCK_SIZE bsize,
- PREDICTION_MODE mode, const uint8_t *ref,
- int ref_stride, uint8_t *dst, int dst_stride,
- int col_off, int row_off, int plane) {
- const int block_width = block_size_wide[bsize];
- const int block_height = block_size_high[bsize];
-#if INTRA_USES_RECT_TRANSFORMS
- const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
- assert(tx_size < TX_SIZES_ALL);
-#else
- const TX_SIZE tx_size = max_txsize_lookup[bsize];
- assert(tx_size < TX_SIZES);
-#endif // INTRA_USES_RECT_TRANSFORMS
-
- // Start by running the helper to predict either the entire block
- // (if the block is square or the same size as tx_size) or the top
- // or left of the block if it's tall and thin or short and wide.
- predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, ref, ref_stride,
- dst, dst_stride, col_off, row_off, plane);
-
-// If we're not using extended transforms, this function should
-// always be called with a square block.
-#if !INTRA_USES_EXT_TRANSFORMS
- assert(block_width == block_height);
-#endif // !INTRA_USES_EXT_TRANSFORMS
-
- // If the block is square, we're done.
- if (block_width == block_height) return;
-
-#if INTRA_USES_EXT_TRANSFORMS
-// If we're using rectangular transforms, we might be done even
-// though the block isn't square.
-#if INTRA_USES_RECT_TRANSFORMS
- if (block_width == tx_size_wide[tx_size] &&
- block_height == tx_size_high[tx_size])
+ cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
return;
-
- // A block should only fail to have a matching transform if it's
- // large and rectangular (such large transform sizes aren't
- // available).
- assert(block_width >= 32 && block_height >= 32);
-#endif // INTRA_USES_RECT_TRANSFORMS
-
- assert((block_width == wpx && block_height == hpx) ||
- (block_width == (wpx >> 1) && block_height == hpx) ||
- (block_width == wpx && block_height == (hpx >> 1)));
-
-// The tmp buffer needs to be big enough to hold MAX_SB_SIZE samples
-// from the image. If CONFIG_HIGHBITDEPTH is enabled, it also needs
-// to be big enough and correctly aligned to hold 16-bit entries.
-#if CONFIG_HIGHBITDEPTH
- uint16_t tmp_buf[MAX_SB_SIZE];
-#else
- uint8_t tmp_buf[MAX_SB_SIZE];
-#endif // CONFIG_HIGHBITDEPTH
- uint8_t *tmp = (uint8_t *)tmp_buf;
-
- if (block_width < block_height) {
- // The block is tall and thin. We've already done the top part,
- // and need to repeat the prediction down the rest of the block.
-
- const int tx_height = tx_size_high[tx_size];
- const int tx_height_off = tx_height >> tx_size_wide_log2[0];
- assert(tx_height_off << tx_size_wide_log2[0] == tx_height);
-
- int next_row_off = row_off + tx_height_off;
- int next_row_idx = tx_height;
-
- while (next_row_idx < block_height) {
- const int last_row_idx = next_row_idx - 1;
-
- // Cast away the const to make a mutable pointer to the last
- // row of ref. This will be snapshotted and restored later.
- uint8_t *last_ref_row = (uint8_t *)ref + last_row_idx * ref_stride;
- uint8_t *last_dst_row = dst + last_row_idx * dst_stride;
-
- const int needs_restore =
- overwrite_ref_row(ref_stride == dst_stride, xd->cur_buf->flags,
- block_width, last_dst_row, last_ref_row, tmp);
-
- const uint8_t *next_ref_row = ref + next_row_idx * ref_stride;
- uint8_t *next_dst_row = dst + next_row_idx * dst_stride;
-
- predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, next_ref_row,
- ref_stride, next_dst_row, dst_stride, col_off,
- next_row_off, plane);
-
- if (needs_restore)
- restore_ref_row(xd->cur_buf->flags, block_width, tmp, last_ref_row);
-
- next_row_idx += tx_height;
- next_row_off += tx_height_off;
- }
- } else {
- // The block is short and wide. We've already done the left part,
- // and need to repeat the prediction to the right.
-
- const int tx_width = tx_size_wide[tx_size];
- const int tx_width_off = tx_width >> tx_size_wide_log2[0];
- assert(tx_width_off << tx_size_wide_log2[0] == tx_width);
-
- int next_col_off = col_off + tx_width_off;
- int next_col_idx = tx_width;
-
- while (next_col_idx < block_width) {
- const int last_col_idx = next_col_idx - 1;
-
- // Cast away the const to make a mutable pointer to ref,
- // starting at the last column written. This will be
- // snapshotted and restored later.
- uint8_t *last_ref_col = (uint8_t *)ref + last_col_idx;
- uint8_t *last_dst_col = dst + last_col_idx;
-
- const int needs_restore =
- overwrite_ref_col(xd->cur_buf->flags, block_height, last_dst_col,
- dst_stride, last_ref_col, ref_stride, tmp);
-
- const uint8_t *next_ref_col = ref + next_col_idx;
- uint8_t *next_dst_col = dst + next_col_idx;
-
- predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, next_ref_col,
- ref_stride, next_dst_col, dst_stride,
- next_col_off, row_off, plane);
-
- if (needs_restore)
- restore_ref_col(xd->cur_buf->flags, block_height, tmp, last_ref_col,
- ref_stride);
-
- next_col_idx += tx_width;
- next_col_off += tx_width_off;
- }
}
-#endif // INTRA_USES_EXT_TRANSFORMS
+ av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
+ angle_delta, use_palette, filter_intra_mode, dst,
+ dst_stride, dst, dst_stride, blk_col, blk_row, plane);
}
-void av1_init_intra_predictors(void) {
- once(av1_init_intra_predictors_internal);
-}
+void av1_init_intra_predictors(void) { once(init_intra_predictors_internal); }
diff --git a/third_party/aom/av1/common/reconintra.h b/third_party/aom/av1/common/reconintra.h
index 42797e310..a7d9e8b79 100644
--- a/third_party/aom/av1/common/reconintra.h
+++ b/third_party/aom/av1/common/reconintra.h
@@ -22,15 +22,16 @@ extern "C" {
void av1_init_intra_predictors(void);
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int plane, int block_idx, int blk_col,
- int blk_row, TX_SIZE tx_size);
+ int plane, int blk_col, int blk_row,
+ TX_SIZE tx_size);
void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int bw, int bh, BLOCK_SIZE bsize,
- PREDICTION_MODE mode, const uint8_t *ref,
- int ref_stride, uint8_t *dst, int dst_stride,
- int aoff, int loff, int plane);
+ int bw, int bh, TX_SIZE tx_size,
+ PREDICTION_MODE mode, int angle_delta,
+ int use_palette,
+ FILTER_INTRA_MODE filter_intra_mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int aoff, int loff, int plane);
-#if CONFIG_INTERINTRA
// Mapping of interintra to intra mode for use in the intra component
static const PREDICTION_MODE interintra_to_intra_mode[INTERINTRA_MODES] = {
DC_PRED, V_PRED, H_PRED, SMOOTH_PRED
@@ -41,44 +42,67 @@ static const INTERINTRA_MODE intra_to_interintra_mode[INTRA_MODES] = {
II_DC_PRED, II_V_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_V_PRED,
II_H_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_SMOOTH_PRED
};
-#endif // CONFIG_INTERINTRA
-
-#if CONFIG_FILTER_INTRA
-#define FILTER_INTRA_PREC_BITS 10
-#endif // CONFIG_FILTER_INTRA
-
-#define CONFIG_INTRA_EDGE_UPSAMPLE CONFIG_INTRA_EDGE
-#define CONFIG_USE_ANGLE_DELTA_SUB8X8 0
-
-#if CONFIG_EXT_INTRA
-static INLINE int av1_is_directional_mode(PREDICTION_MODE mode,
- BLOCK_SIZE bsize) {
-#if CONFIG_INTRA_EDGE_UPSAMPLE
- (void)bsize;
- return mode >= V_PRED && mode <= D63_PRED;
-#else
- return mode >= V_PRED && mode <= D63_PRED && bsize >= BLOCK_8X8;
-#endif
+
+#define FILTER_INTRA_SCALE_BITS 4
+
+static INLINE int av1_is_directional_mode(PREDICTION_MODE mode) {
+ return mode >= V_PRED && mode <= D67_PRED;
}
static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) {
- (void)bsize;
-#if CONFIG_USE_ANGLE_DELTA_SUB8X8
- return 1;
-#else
return bsize >= BLOCK_8X8;
-#endif
}
-#endif // CONFIG_EXT_INTRA
-#if CONFIG_INTRABC
-static INLINE int av1_allow_intrabc(BLOCK_SIZE bsize,
- const AV1_COMMON *const cm) {
- return (bsize >= BLOCK_8X8 || bsize == BLOCK_4X4) &&
- cm->allow_screen_content_tools;
+static INLINE int av1_allow_intrabc(const AV1_COMMON *const cm) {
+ return frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
+ cm->allow_intrabc;
+}
+
+static INLINE int av1_filter_intra_allowed_bsize(const AV1_COMMON *const cm,
+ BLOCK_SIZE bs) {
+ if (!cm->seq_params.enable_filter_intra || bs == BLOCK_INVALID) return 0;
+
+ return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32;
}
-#endif // CONFIG_INTRABC
+static INLINE int av1_filter_intra_allowed(const AV1_COMMON *const cm,
+ const MB_MODE_INFO *mbmi) {
+ return mbmi->mode == DC_PRED &&
+ mbmi->palette_mode_info.palette_size[0] == 0 &&
+ av1_filter_intra_allowed_bsize(cm, mbmi->sb_type);
+}
+
+extern const int8_t av1_filter_intra_taps[FILTER_INTRA_MODES][8][8];
+
+// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
+// If angle > 0 && angle < 90, dx = -((int)(256 / t));
+// If angle > 90 && angle < 180, dx = (int)(256 / t);
+// If angle > 180 && angle < 270, dx = 1;
+static INLINE int av1_get_dx(int angle) {
+ if (angle > 0 && angle < 90) {
+ return dr_intra_derivative[angle];
+ } else if (angle > 90 && angle < 180) {
+ return dr_intra_derivative[180 - angle];
+ } else {
+ // In this case, we are not really going to use dx. We may return any value.
+ return 1;
+ }
+}
+
+// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X.
+// If angle > 0 && angle < 90, dy = 1;
+// If angle > 90 && angle < 180, dy = (int)(256 * t);
+// If angle > 180 && angle < 270, dy = -((int)(256 * t));
+static INLINE int av1_get_dy(int angle) {
+ if (angle > 90 && angle < 180) {
+ return dr_intra_derivative[angle - 90];
+ } else if (angle > 180 && angle < 270) {
+ return dr_intra_derivative[270 - angle];
+ } else {
+ // In this case, we are not really going to use dy. We may return any value.
+ return 1;
+ }
+}
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/resize.c b/third_party/aom/av1/common/resize.c
index b0f303e35..17e6823b1 100644
--- a/third_party/aom/av1/common/resize.c
+++ b/third_party/aom/av1/common/resize.c
@@ -16,30 +16,18 @@
#include <stdlib.h>
#include <string.h>
-#include "./aom_config.h"
-#if CONFIG_HIGHBITDEPTH
+#include "config/aom_config.h"
+
#include "aom_dsp/aom_dsp_common.h"
-#endif // CONFIG_HIGHBITDEPTH
#include "aom_ports/mem.h"
#include "aom_scale/aom_scale.h"
#include "av1/common/common.h"
#include "av1/common/resize.h"
-#include "./aom_scale_rtcd.h"
-
-#define FILTER_BITS 7
-
-#define INTERP_TAPS 8
-#define SUBPEL_BITS_RS 6
-#define SUBPEL_MASK_RS ((1 << SUBPEL_BITS_RS) - 1)
-#define INTERP_PRECISION_BITS 16
-#define SUBPEL_INTERP_EXTRA_BITS (INTERP_PRECISION_BITS - SUBPEL_BITS_RS)
-#define SUBPEL_INTERP_EXTRA_OFF (1 << (SUBPEL_INTERP_EXTRA_BITS - 1))
-
-typedef int16_t interp_kernel[INTERP_TAPS];
+#include "config/aom_scale_rtcd.h"
// Filters for interpolation (0.5-band) - note this also filters integer pels.
-static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS_RS)] = {
+static const InterpKernel filteredinterp_filters500[(1 << RS_SUBPEL_BITS)] = {
{ -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, 0, 34, 64, 36, 0, -3, 0 },
{ -3, -1, 34, 64, 36, 1, -3, 0 }, { -3, -1, 33, 64, 37, 1, -3, 0 },
{ -3, -1, 32, 64, 38, 1, -3, 0 }, { -3, -1, 31, 64, 39, 1, -3, 0 },
@@ -75,7 +63,7 @@ static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS_RS)] = {
};
// Filters for interpolation (0.625-band) - note this also filters integer pels.
-static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS_RS)] = {
+static const InterpKernel filteredinterp_filters625[(1 << RS_SUBPEL_BITS)] = {
{ -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 31, 80, 34, -8, -1, 1 },
{ -1, -8, 30, 80, 35, -8, -1, 1 }, { -1, -8, 29, 80, 36, -7, -2, 1 },
{ -1, -8, 28, 80, 37, -7, -2, 1 }, { -1, -8, 27, 80, 38, -7, -2, 1 },
@@ -111,7 +99,7 @@ static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS_RS)] = {
};
// Filters for interpolation (0.75-band) - note this also filters integer pels.
-static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS_RS)] = {
+static const InterpKernel filteredinterp_filters750[(1 << RS_SUBPEL_BITS)] = {
{ 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 24, 96, 26, -11, 2, 0 },
{ 2, -11, 22, 96, 28, -11, 2, 0 }, { 2, -10, 21, 96, 29, -12, 2, 0 },
{ 2, -10, 19, 96, 31, -12, 2, 0 }, { 2, -10, 18, 95, 32, -11, 2, 0 },
@@ -147,7 +135,7 @@ static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS_RS)] = {
};
// Filters for interpolation (0.875-band) - note this also filters integer pels.
-static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS_RS)] = {
+static const InterpKernel filteredinterp_filters875[(1 << RS_SUBPEL_BITS)] = {
{ 3, -8, 13, 112, 13, -8, 3, 0 }, { 2, -7, 12, 112, 15, -8, 3, -1 },
{ 3, -7, 10, 112, 17, -9, 3, -1 }, { 2, -6, 8, 112, 19, -9, 3, -1 },
{ 2, -6, 7, 112, 21, -10, 3, -1 }, { 2, -5, 6, 111, 22, -10, 3, -1 },
@@ -183,7 +171,7 @@ static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS_RS)] = {
};
// Filters for interpolation (full-band) - no filtering for integer pixels
-static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS_RS)] = {
+static const InterpKernel filteredinterp_filters1000[(1 << RS_SUBPEL_BITS)] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
{ 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
{ 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
@@ -218,153 +206,116 @@ static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS_RS)] = {
{ 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
};
-#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
-#define INTERP_SIMPLE_TAPS 4
-static const int16_t filter_simple[(1
- << SUBPEL_BITS_RS)][INTERP_SIMPLE_TAPS] = {
-#if INTERP_SIMPLE_TAPS == 2
- { 128, 0 }, { 126, 2 }, { 124, 4 }, { 122, 6 }, { 120, 8 }, { 118, 10 },
- { 116, 12 }, { 114, 14 }, { 112, 16 }, { 110, 18 }, { 108, 20 }, { 106, 22 },
- { 104, 24 }, { 102, 26 }, { 100, 28 }, { 98, 30 }, { 96, 32 }, { 94, 34 },
- { 92, 36 }, { 90, 38 }, { 88, 40 }, { 86, 42 }, { 84, 44 }, { 82, 46 },
- { 80, 48 }, { 78, 50 }, { 76, 52 }, { 74, 54 }, { 72, 56 }, { 70, 58 },
- { 68, 60 }, { 66, 62 }, { 64, 64 }, { 62, 66 }, { 60, 68 }, { 58, 70 },
- { 56, 72 }, { 54, 74 }, { 52, 76 }, { 50, 78 }, { 48, 80 }, { 46, 82 },
- { 44, 84 }, { 42, 86 }, { 40, 88 }, { 38, 90 }, { 36, 92 }, { 34, 94 },
- { 32, 96 }, { 30, 98 }, { 28, 100 }, { 26, 102 }, { 24, 104 }, { 22, 106 },
- { 20, 108 }, { 18, 110 }, { 16, 112 }, { 14, 114 }, { 12, 116 }, { 10, 118 },
- { 8, 120 }, { 6, 122 }, { 4, 124 }, { 2, 126 },
-#elif INTERP_SIMPLE_TAPS == 4
- { 0, 128, 0, 0 }, { -1, 128, 2, -1 }, { -2, 127, 4, -1 },
- { -3, 126, 7, -2 }, { -4, 125, 9, -2 }, { -5, 125, 11, -3 },
- { -6, 124, 13, -3 }, { -7, 123, 16, -4 }, { -7, 122, 18, -5 },
- { -8, 121, 20, -5 }, { -9, 120, 23, -6 }, { -9, 118, 25, -6 },
- { -10, 117, 28, -7 }, { -11, 116, 30, -7 }, { -11, 114, 33, -8 },
- { -12, 113, 35, -8 }, { -12, 111, 38, -9 }, { -13, 109, 41, -9 },
- { -13, 108, 43, -10 }, { -13, 106, 45, -10 }, { -13, 104, 48, -11 },
- { -14, 102, 51, -11 }, { -14, 100, 53, -11 }, { -14, 98, 56, -12 },
- { -14, 96, 58, -12 }, { -14, 94, 61, -13 }, { -15, 92, 64, -13 },
- { -15, 90, 66, -13 }, { -15, 87, 69, -13 }, { -14, 85, 71, -14 },
- { -14, 83, 73, -14 }, { -14, 80, 76, -14 }, { -14, 78, 78, -14 },
- { -14, 76, 80, -14 }, { -14, 73, 83, -14 }, { -14, 71, 85, -14 },
- { -13, 69, 87, -15 }, { -13, 66, 90, -15 }, { -13, 64, 92, -15 },
- { -13, 61, 94, -14 }, { -12, 58, 96, -14 }, { -12, 56, 98, -14 },
- { -11, 53, 100, -14 }, { -11, 51, 102, -14 }, { -11, 48, 104, -13 },
- { -10, 45, 106, -13 }, { -10, 43, 108, -13 }, { -9, 41, 109, -13 },
- { -9, 38, 111, -12 }, { -8, 35, 113, -12 }, { -8, 33, 114, -11 },
- { -7, 30, 116, -11 }, { -7, 28, 117, -10 }, { -6, 25, 118, -9 },
- { -6, 23, 120, -9 }, { -5, 20, 121, -8 }, { -5, 18, 122, -7 },
- { -4, 16, 123, -7 }, { -3, 13, 124, -6 }, { -3, 11, 125, -5 },
- { -2, 9, 125, -4 }, { -2, 7, 126, -3 }, { -1, 4, 127, -2 },
- { -1, 2, 128, -1 },
-#elif INTERP_SIMPLE_TAPS == 6
- { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },
- { 1, -3, 127, 4, -2, 1 }, { 1, -4, 127, 6, -3, 1 },
- { 2, -6, 126, 8, -3, 1 }, { 2, -7, 125, 11, -4, 1 },
- { 2, -9, 125, 13, -5, 2 }, { 3, -10, 124, 15, -6, 2 },
- { 3, -11, 123, 18, -7, 2 }, { 3, -12, 122, 20, -8, 3 },
- { 4, -13, 121, 22, -9, 3 }, { 4, -14, 119, 25, -9, 3 },
- { 4, -15, 118, 27, -10, 4 }, { 4, -16, 117, 30, -11, 4 },
- { 5, -17, 116, 32, -12, 4 }, { 5, -17, 114, 35, -13, 4 },
- { 5, -18, 112, 37, -13, 5 }, { 5, -19, 111, 40, -14, 5 },
- { 6, -19, 109, 42, -15, 5 }, { 6, -20, 107, 45, -15, 5 },
- { 6, -20, 105, 48, -16, 5 }, { 6, -21, 103, 51, -17, 6 },
- { 6, -21, 101, 53, -17, 6 }, { 6, -21, 99, 56, -18, 6 },
- { 7, -22, 97, 58, -18, 6 }, { 7, -22, 95, 61, -19, 6 },
- { 7, -22, 93, 63, -19, 6 }, { 7, -22, 91, 66, -20, 6 },
- { 7, -22, 88, 69, -20, 6 }, { 7, -22, 86, 71, -21, 7 },
- { 7, -22, 83, 74, -21, 7 }, { 7, -22, 81, 76, -21, 7 },
- { 7, -22, 79, 79, -22, 7 }, { 7, -21, 76, 81, -22, 7 },
- { 7, -21, 74, 83, -22, 7 }, { 7, -21, 71, 86, -22, 7 },
- { 6, -20, 69, 88, -22, 7 }, { 6, -20, 66, 91, -22, 7 },
- { 6, -19, 63, 93, -22, 7 }, { 6, -19, 61, 95, -22, 7 },
- { 6, -18, 58, 97, -22, 7 }, { 6, -18, 56, 99, -21, 6 },
- { 6, -17, 53, 101, -21, 6 }, { 6, -17, 51, 103, -21, 6 },
- { 5, -16, 48, 105, -20, 6 }, { 5, -15, 45, 107, -20, 6 },
- { 5, -15, 42, 109, -19, 6 }, { 5, -14, 40, 111, -19, 5 },
- { 5, -13, 37, 112, -18, 5 }, { 4, -13, 35, 114, -17, 5 },
- { 4, -12, 32, 116, -17, 5 }, { 4, -11, 30, 117, -16, 4 },
- { 4, -10, 27, 118, -15, 4 }, { 3, -9, 25, 119, -14, 4 },
- { 3, -9, 22, 121, -13, 4 }, { 3, -8, 20, 122, -12, 3 },
- { 2, -7, 18, 123, -11, 3 }, { 2, -6, 15, 124, -10, 3 },
- { 2, -5, 13, 125, -9, 2 }, { 1, -4, 11, 125, -7, 2 },
- { 1, -3, 8, 126, -6, 2 }, { 1, -3, 6, 127, -4, 1 },
- { 1, -2, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },
+const int16_t av1_resize_filter_normative[(
+ 1 << RS_SUBPEL_BITS)][UPSCALE_NORMATIVE_TAPS] = {
+#if UPSCALE_NORMATIVE_TAPS == 8
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
+ { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
+ { 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
+ { -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 },
+ { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 },
+ { -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 },
+ { -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 },
+ { -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 },
+ { -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 },
+ { -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 },
+ { -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 },
+ { -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 },
+ { -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 },
+ { -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 },
+ { -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 },
+ { -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 },
+ { -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 },
+ { -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 },
+ { -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 },
+ { -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 },
+ { -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 },
+ { -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 },
+ { -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 },
+ { -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 },
+ { -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 },
+ { -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 },
+ { -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 },
+ { -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 },
+ { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 },
+ { 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 },
+ { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 },
+ { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
#else
-#error "Invalid value of INTERP_SIMPLE_TAPS"
-#endif // INTERP_SIMPLE_TAPS == 2
+#error "Invalid value of UPSCALE_NORMATIVE_TAPS"
+#endif // UPSCALE_NORMATIVE_TAPS == 8
};
-#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
// Filters for factor of 2 downsampling.
static const int16_t av1_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
static const int16_t av1_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
-static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
- int outlength16 = outlength * 16;
- if (outlength16 >= inlength * 16)
+static const InterpKernel *choose_interp_filter(int in_length, int out_length) {
+ int out_length16 = out_length * 16;
+ if (out_length16 >= in_length * 16)
return filteredinterp_filters1000;
- else if (outlength16 >= inlength * 13)
+ else if (out_length16 >= in_length * 13)
return filteredinterp_filters875;
- else if (outlength16 >= inlength * 11)
+ else if (out_length16 >= in_length * 11)
return filteredinterp_filters750;
- else if (outlength16 >= inlength * 9)
+ else if (out_length16 >= in_length * 9)
return filteredinterp_filters625;
else
return filteredinterp_filters500;
}
-static void interpolate_core(const uint8_t *const input, int inlength,
- uint8_t *output, int outlength,
+static void interpolate_core(const uint8_t *const input, int in_length,
+ uint8_t *output, int out_length,
const int16_t *interp_filters, int interp_taps) {
const int32_t delta =
- (((uint32_t)inlength << INTERP_PRECISION_BITS) + outlength / 2) /
- outlength;
+ (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
+ out_length;
const int32_t offset =
- inlength > outlength
- ? (((int32_t)(inlength - outlength) << (INTERP_PRECISION_BITS - 1)) +
- outlength / 2) /
- outlength
- : -(((int32_t)(outlength - inlength) << (INTERP_PRECISION_BITS - 1)) +
- outlength / 2) /
- outlength;
+ in_length > out_length
+ ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length
+ : -(((int32_t)(out_length - in_length)
+ << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length;
uint8_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int32_t y;
x = 0;
- y = offset + SUBPEL_INTERP_EXTRA_OFF;
- while ((y >> INTERP_PRECISION_BITS) < (interp_taps / 2 - 1)) {
+ y = offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
x++;
y += delta;
}
x1 = x;
- x = outlength - 1;
- y = delta * x + offset + SUBPEL_INTERP_EXTRA_OFF;
- while ((y >> INTERP_PRECISION_BITS) + (int32_t)(interp_taps / 2) >=
- inlength) {
+ x = out_length - 1;
+ y = delta * x + offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
+ in_length) {
x--;
y -= delta;
}
x2 = x;
if (x1 > x2) {
- for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < outlength;
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k) {
const int pk = int_pel - interp_taps / 2 + 1 + k;
- sum += filter[k] * input[AOMMAX(AOMMIN(pk, inlength - 1), 0)];
+ sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
}
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
} else {
// Initial part.
- for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < x1; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
@@ -373,8 +324,8 @@ static void interpolate_core(const uint8_t *const input, int inlength,
}
// Middle part.
for (; x <= x2; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
@@ -382,35 +333,42 @@ static void interpolate_core(const uint8_t *const input, int inlength,
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
// End part.
- for (; x < outlength; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ for (; x < out_length; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
sum += filter[k] *
- input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, inlength - 1)];
+ input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
*optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
}
}
}
-static void interpolate(const uint8_t *const input, int inlength,
- uint8_t *output, int outlength) {
- const interp_kernel *interp_filters =
- choose_interp_filter(inlength, outlength);
+static void interpolate(const uint8_t *const input, int in_length,
+ uint8_t *output, int out_length) {
+ const InterpKernel *interp_filters =
+ choose_interp_filter(in_length, out_length);
+
+ interpolate_core(input, in_length, output, out_length, &interp_filters[0][0],
+ SUBPEL_TAPS);
+}
- interpolate_core(input, inlength, output, outlength, &interp_filters[0][0],
- INTERP_TAPS);
+int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
+ return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
}
-#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
-static void interpolate_simple(const uint8_t *const input, int inlength,
- uint8_t *output, int outlength) {
- interpolate_core(input, inlength, output, outlength, &filter_simple[0][0],
- INTERP_SIMPLE_TAPS);
+static int32_t get_upscale_convolve_x0(int in_length, int out_length,
+ int32_t x_step_qn) {
+ const int err = out_length * x_step_qn - (in_length << RS_SCALE_SUBPEL_BITS);
+ const int32_t x0 =
+ (-((out_length - in_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length +
+ RS_SCALE_EXTRA_OFF - err / 2;
+ return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK);
}
-#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#ifndef __clang_analyzer__
static void down2_symeven(const uint8_t *const input, int length,
@@ -525,8 +483,7 @@ static void down2_symodd(const uint8_t *const input, int length,
}
static int get_down2_length(int length, int steps) {
- int s;
- for (s = 0; s < steps; ++s) length = (length + 1) >> 1;
+ for (int s = 0; s < steps; ++s) length = (length + 1) >> 1;
return length;
}
@@ -536,6 +493,12 @@ static int get_down2_steps(int in_length, int out_length) {
while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
++steps;
in_length = proj_in_length;
+ if (in_length == 1) {
+ // Special case: we break because any further calls to get_down2_length()
+ // with be with length == 1, which return 1, resulting in an infinite
+ // loop.
+ break;
+ }
}
return steps;
}
@@ -624,97 +587,118 @@ Error:
aom_free(arrbuf2);
}
-#if CONFIG_FRAME_SUPERRES
-static void upscale_normative(const uint8_t *const input, int length,
- uint8_t *output, int olength) {
-#if CONFIG_LOOP_RESTORATION
- interpolate_simple(input, length, output, olength);
-#else
- interpolate(input, length, output, olength);
-#endif // CONFIG_LOOP_RESTORATION
-}
-
-static void upscale_normative_plane(const uint8_t *const input, int height,
- int width, int in_stride, uint8_t *output,
- int height2, int width2, int out_stride) {
- int i;
- uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height);
- uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height);
- uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2);
- if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
+static void upscale_normative_rect(const uint8_t *const input, int height,
+ int width, int in_stride, uint8_t *output,
+ int height2, int width2, int out_stride,
+ int x_step_qn, int x0_qn, int pad_left,
+ int pad_right) {
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
assert(height2 > 0);
- for (i = 0; i < height; ++i)
- upscale_normative(input + in_stride * i, width, intbuf + width2 * i,
- width2);
- for (i = 0; i < width2; ++i) {
- fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- upscale_normative(arrbuf, height, arrbuf2, height2);
- fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
+ assert(height2 == height);
+
+ // Extend the left/right pixels of the tile column if needed
+ // (either because we can't sample from other tiles, or because we're at
+ // a frame edge).
+ // Save the overwritten pixels into tmp_left and tmp_right.
+ // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
+ // column of border pixels compared to what we'd naively think.
+ const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
+ uint8_t *tmp_left =
+ NULL; // Silence spurious "may be used uninitialized" warnings
+ uint8_t *tmp_right = NULL;
+ uint8_t *const in_tl = (uint8_t *)(input - border_cols); // Cast off 'const'
+ uint8_t *const in_tr = (uint8_t *)(input + width);
+ if (pad_left) {
+ tmp_left = (uint8_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
+ for (int i = 0; i < height; i++) {
+ memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_cols);
+ memset(in_tl + i * in_stride, input[i * in_stride], border_cols);
+ }
+ }
+ if (pad_right) {
+ tmp_right =
+ (uint8_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
+ for (int i = 0; i < height; i++) {
+ memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_cols);
+ memset(in_tr + i * in_stride, input[i * in_stride + width - 1],
+ border_cols);
+ }
}
-Error:
- aom_free(intbuf);
- aom_free(arrbuf);
- aom_free(arrbuf2);
+ av1_convolve_horiz_rs(input - 1, in_stride, output, out_stride, width2,
+ height2, &av1_resize_filter_normative[0][0], x0_qn,
+ x_step_qn);
+
+ // Restore the left/right border pixels
+ if (pad_left) {
+ for (int i = 0; i < height; i++) {
+ memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_cols);
+ }
+ aom_free(tmp_left);
+ }
+ if (pad_right) {
+ for (int i = 0; i < height; i++) {
+ memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_cols);
+ }
+ aom_free(tmp_right);
+ }
}
-#endif // CONFIG_FRAME_SUPERRES
-#if CONFIG_HIGHBITDEPTH
-static void highbd_interpolate_core(const uint16_t *const input, int inlength,
- uint16_t *output, int outlength, int bd,
+static void highbd_interpolate_core(const uint16_t *const input, int in_length,
+ uint16_t *output, int out_length, int bd,
const int16_t *interp_filters,
int interp_taps) {
const int32_t delta =
- (((uint32_t)inlength << INTERP_PRECISION_BITS) + outlength / 2) /
- outlength;
+ (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
+ out_length;
const int32_t offset =
- inlength > outlength
- ? (((int32_t)(inlength - outlength) << (INTERP_PRECISION_BITS - 1)) +
- outlength / 2) /
- outlength
- : -(((int32_t)(outlength - inlength) << (INTERP_PRECISION_BITS - 1)) +
- outlength / 2) /
- outlength;
+ in_length > out_length
+ ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length
+ : -(((int32_t)(out_length - in_length)
+ << (RS_SCALE_SUBPEL_BITS - 1)) +
+ out_length / 2) /
+ out_length;
uint16_t *optr = output;
int x, x1, x2, sum, k, int_pel, sub_pel;
int32_t y;
x = 0;
- y = offset + SUBPEL_INTERP_EXTRA_OFF;
- while ((y >> INTERP_PRECISION_BITS) < (interp_taps / 2 - 1)) {
+ y = offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
x++;
y += delta;
}
x1 = x;
- x = outlength - 1;
- y = delta * x + offset + SUBPEL_INTERP_EXTRA_OFF;
- while ((y >> INTERP_PRECISION_BITS) + (int32_t)(interp_taps / 2) >=
- inlength) {
+ x = out_length - 1;
+ y = delta * x + offset + RS_SCALE_EXTRA_OFF;
+ while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
+ in_length) {
x--;
y -= delta;
}
x2 = x;
if (x1 > x2) {
- for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < outlength;
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k) {
const int pk = int_pel - interp_taps / 2 + 1 + k;
- sum += filter[k] * input[AOMMAX(AOMMIN(pk, inlength - 1), 0)];
+ sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
}
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
} else {
// Initial part.
- for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < x1; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
@@ -723,8 +707,8 @@ static void highbd_interpolate_core(const uint16_t *const input, int inlength,
}
// Middle part.
for (; x <= x2; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
@@ -732,35 +716,27 @@ static void highbd_interpolate_core(const uint16_t *const input, int inlength,
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
// End part.
- for (; x < outlength; ++x, y += delta) {
- int_pel = y >> INTERP_PRECISION_BITS;
- sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
+ for (; x < out_length; ++x, y += delta) {
+ int_pel = y >> RS_SCALE_SUBPEL_BITS;
+ sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
const int16_t *filter = &interp_filters[sub_pel * interp_taps];
sum = 0;
for (k = 0; k < interp_taps; ++k)
sum += filter[k] *
- input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, inlength - 1)];
+ input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
}
}
-static void highbd_interpolate(const uint16_t *const input, int inlength,
- uint16_t *output, int outlength, int bd) {
- const interp_kernel *interp_filters =
- choose_interp_filter(inlength, outlength);
-
- highbd_interpolate_core(input, inlength, output, outlength, bd,
- &interp_filters[0][0], INTERP_TAPS);
-}
+static void highbd_interpolate(const uint16_t *const input, int in_length,
+ uint16_t *output, int out_length, int bd) {
+ const InterpKernel *interp_filters =
+ choose_interp_filter(in_length, out_length);
-#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
-static void highbd_interpolate_simple(const uint16_t *const input, int inlength,
- uint16_t *output, int outlength, int bd) {
- highbd_interpolate_core(input, inlength, output, outlength, bd,
- &filter_simple[0][0], INTERP_SIMPLE_TAPS);
+ highbd_interpolate_core(input, in_length, output, out_length, bd,
+ &interp_filters[0][0], SUBPEL_TAPS);
}
-#endif // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
#ifndef __clang_analyzer__
static void highbd_down2_symeven(const uint16_t *const input, int length,
@@ -958,44 +934,68 @@ Error:
aom_free(arrbuf2);
}
-#if CONFIG_FRAME_SUPERRES
-static void highbd_upscale_normative(const uint16_t *const input, int length,
- uint16_t *output, int olength, int bd) {
-#if CONFIG_LOOP_RESTORATION
- highbd_interpolate_simple(input, length, output, olength, bd);
-#else
- highbd_interpolate(input, length, output, olength, bd);
-#endif // CONFIG_LOOP_RESTORATION
-}
-
-static void highbd_upscale_normative_plane(const uint8_t *const input,
- int height, int width, int in_stride,
- uint8_t *output, int height2,
- int width2, int out_stride, int bd) {
- int i;
- uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height);
- uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height);
- uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2);
- if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
- for (i = 0; i < height; ++i) {
- highbd_upscale_normative(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
- intbuf + width2 * i, width2, bd);
+static void highbd_upscale_normative_rect(const uint8_t *const input,
+ int height, int width, int in_stride,
+ uint8_t *output, int height2,
+ int width2, int out_stride,
+ int x_step_qn, int x0_qn,
+ int pad_left, int pad_right, int bd) {
+ assert(width > 0);
+ assert(height > 0);
+ assert(width2 > 0);
+ assert(height2 > 0);
+ assert(height2 == height);
+
+ // Extend the left/right pixels of the tile column if needed
+ // (either because we can't sample from other tiles, or because we're at
+ // a frame edge).
+ // Save the overwritten pixels into tmp_left and tmp_right.
+ // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
+ // column of border pixels compared to what we'd naively think.
+ const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
+ const int border_size = border_cols * sizeof(uint16_t);
+ uint16_t *tmp_left =
+ NULL; // Silence spurious "may be used uninitialized" warnings
+ uint16_t *tmp_right = NULL;
+ uint16_t *const input16 = CONVERT_TO_SHORTPTR(input);
+ uint16_t *const in_tl = input16 - border_cols;
+ uint16_t *const in_tr = input16 + width;
+ if (pad_left) {
+ tmp_left = (uint16_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
+ for (int i = 0; i < height; i++) {
+ memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_size);
+ aom_memset16(in_tl + i * in_stride, input16[i * in_stride], border_cols);
+ }
}
- for (i = 0; i < width2; ++i) {
- highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- highbd_upscale_normative(arrbuf, height, arrbuf2, height2, bd);
- highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
- arrbuf2);
+ if (pad_right) {
+ tmp_right =
+ (uint16_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
+ for (int i = 0; i < height; i++) {
+ memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_size);
+ aom_memset16(in_tr + i * in_stride, input16[i * in_stride + width - 1],
+ border_cols);
+ }
}
-Error:
- aom_free(intbuf);
- aom_free(arrbuf);
- aom_free(arrbuf2);
-}
-#endif // CONFIG_FRAME_SUPERRES
+ av1_highbd_convolve_horiz_rs(CONVERT_TO_SHORTPTR(input - 1), in_stride,
+ CONVERT_TO_SHORTPTR(output), out_stride, width2,
+ height2, &av1_resize_filter_normative[0][0],
+ x0_qn, x_step_qn, bd);
-#endif // CONFIG_HIGHBITDEPTH
+ // Restore the left/right border pixels
+ if (pad_left) {
+ for (int i = 0; i < height; i++) {
+ memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_size);
+ }
+ aom_free(tmp_left);
+ }
+ if (pad_right) {
+ for (int i = 0; i < height; i++) {
+ memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_size);
+ }
+ aom_free(tmp_right);
+ }
+}
void av1_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
@@ -1031,7 +1031,6 @@ void av1_resize_frame444(const uint8_t *const y, int y_stride,
resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride);
}
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride,
const uint8_t *const u, const uint8_t *const v,
int uv_stride, int height, int width,
@@ -1073,125 +1072,137 @@ void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
ouv_stride, bd);
}
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_HIGHBITDEPTH
-void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd) {
-#else
void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
-#endif // CONFIG_HIGHBITDEPTH
+ YV12_BUFFER_CONFIG *dst, int bd,
+ const int num_planes) {
// TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t
- int i;
- const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
- src->v_buffer };
- const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
- const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
- src->uv_crop_width };
- const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
- src->uv_crop_height };
- uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
- const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
- const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
- dst->uv_crop_width };
- const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
- dst->uv_crop_height };
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
-#if CONFIG_HIGHBITDEPTH
+
+ // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
+ // the static analysis warnings.
+ for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
+ const int is_uv = i > 0;
if (src->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
- src_strides[i], dsts[i], dst_heights[i],
- dst_widths[i], dst_strides[i], bd);
+ highbd_resize_plane(src->buffers[i], src->crop_heights[is_uv],
+ src->crop_widths[is_uv], src->strides[is_uv],
+ dst->buffers[i], dst->crop_heights[is_uv],
+ dst->crop_widths[is_uv], dst->strides[is_uv], bd);
else
-#endif // CONFIG_HIGHBITDEPTH
- resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
- dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
+ resize_plane(src->buffers[i], src->crop_heights[is_uv],
+ src->crop_widths[is_uv], src->strides[is_uv],
+ dst->buffers[i], dst->crop_heights[is_uv],
+ dst->crop_widths[is_uv], dst->strides[is_uv]);
}
- aom_extend_frame_borders(dst);
+ aom_extend_frame_borders(dst, num_planes);
}
-#if CONFIG_FRAME_SUPERRES
-#if CONFIG_HIGHBITDEPTH
-void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd) {
-#else
-void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
-#endif // CONFIG_HIGHBITDEPTH
- // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t
- int i;
- const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
- src->v_buffer };
- const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
- const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
- src->uv_crop_width };
- const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
- src->uv_crop_height };
- uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
- const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
- const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
- dst->uv_crop_width };
- const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
- dst->uv_crop_height };
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
-#if CONFIG_HIGHBITDEPTH
- if (src->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_upscale_normative_plane(srcs[i], src_heights[i], src_widths[i],
- src_strides[i], dsts[i], dst_heights[i],
- dst_widths[i], dst_strides[i], bd);
+void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride,
+ int plane, int rows) {
+ const int is_uv = (plane > 0);
+ const int ss_x = is_uv && cm->subsampling_x;
+ const int downscaled_plane_width = ROUND_POWER_OF_TWO(cm->width, ss_x);
+ const int upscaled_plane_width =
+ ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
+ const int superres_denom = cm->superres_scale_denominator;
+
+ TileInfo tile_col;
+ const int32_t x_step_qn = av1_get_upscale_convolve_step(
+ downscaled_plane_width, upscaled_plane_width);
+ int32_t x0_qn = get_upscale_convolve_x0(downscaled_plane_width,
+ upscaled_plane_width, x_step_qn);
+
+ for (int j = 0; j < cm->tile_cols; j++) {
+ av1_tile_set_col(&tile_col, cm, j);
+ // Determine the limits of this tile column in both the source
+ // and destination images.
+ // Note: The actual location which we start sampling from is
+ // (downscaled_x0 - 1 + (x0_qn/2^14)), and this quantity increases
+ // by exactly dst_width * (x_step_qn/2^14) pixels each iteration.
+ const int downscaled_x0 = tile_col.mi_col_start << (MI_SIZE_LOG2 - ss_x);
+ const int downscaled_x1 = tile_col.mi_col_end << (MI_SIZE_LOG2 - ss_x);
+ const int src_width = downscaled_x1 - downscaled_x0;
+
+ const int upscaled_x0 = (downscaled_x0 * superres_denom) / SCALE_NUMERATOR;
+ int upscaled_x1;
+ if (j == cm->tile_cols - 1) {
+ // Note that we can't just use AOMMIN here - due to rounding,
+ // (downscaled_x1 * superres_denom) / SCALE_NUMERATOR may be less than
+ // upscaled_plane_width.
+ upscaled_x1 = upscaled_plane_width;
+ } else {
+ upscaled_x1 = (downscaled_x1 * superres_denom) / SCALE_NUMERATOR;
+ }
+
+ const uint8_t *const src_ptr = src + downscaled_x0;
+ uint8_t *const dst_ptr = dst + upscaled_x0;
+ const int dst_width = upscaled_x1 - upscaled_x0;
+
+ const int pad_left = (j == 0);
+ const int pad_right = (j == cm->tile_cols - 1);
+
+ if (cm->use_highbitdepth)
+ highbd_upscale_normative_rect(
+ src_ptr, rows, src_width, src_stride, dst_ptr, rows, dst_width,
+ dst_stride, x_step_qn, x0_qn, pad_left, pad_right, cm->bit_depth);
else
-#endif // CONFIG_HIGHBITDEPTH
- upscale_normative_plane(srcs[i], src_heights[i], src_widths[i],
- src_strides[i], dsts[i], dst_heights[i],
- dst_widths[i], dst_strides[i]);
+ upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr,
+ rows, dst_width, dst_stride, x_step_qn, x0_qn,
+ pad_left, pad_right);
+
+ // Update the fractional pixel offset to prepare for the next tile column.
+ x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS);
}
- aom_extend_frame_borders(dst);
}
-#endif // CONFIG_FRAME_SUPERRES
+
+void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm,
+ const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int num_planes = av1_num_planes(cm);
+ for (int i = 0; i < num_planes; ++i) {
+ const int is_uv = (i > 0);
+ av1_upscale_normative_rows(cm, src->buffers[i], src->strides[is_uv],
+ dst->buffers[i], dst->strides[is_uv], i,
+ src->crop_heights[is_uv]);
+ }
+
+ aom_extend_frame_borders(dst, num_planes);
+}
YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled) {
+ const int num_planes = av1_num_planes(cm);
if (cm->width != unscaled->y_crop_width ||
cm->height != unscaled->y_crop_height) {
-#if CONFIG_HIGHBITDEPTH
- av1_resize_and_extend_frame(unscaled, scaled, (int)cm->bit_depth);
-#else
- av1_resize_and_extend_frame(unscaled, scaled);
-#endif // CONFIG_HIGHBITDEPTH
+ av1_resize_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
+ num_planes);
return scaled;
} else {
return unscaled;
}
}
-// Calculates scaled dimensions given original dimensions and the scale
-// denominator. If 'scale_height' is 1, both width and height are scaled;
-// otherwise, only the width is scaled.
-static void calculate_scaled_size_helper(int *width, int *height, int denom,
- int scale_height) {
+// Calculates the scaled dimension given the original dimension and the scale
+// denominator.
+static void calculate_scaled_size_helper(int *dim, int denom) {
if (denom != SCALE_NUMERATOR) {
- *width = *width * SCALE_NUMERATOR / denom;
- *width += *width & 1; // Make it even.
- if (scale_height) {
- *height = *height * SCALE_NUMERATOR / denom;
- *height += *height & 1; // Make it even.
- }
+ // Use this version if we need *dim to be even
+ // *width = (*width * SCALE_NUMERATOR + denom) / (2 * denom);
+ // *width <<= 1;
+ *dim = (*dim * SCALE_NUMERATOR + denom / 2) / (denom);
}
}
void av1_calculate_scaled_size(int *width, int *height, int resize_denom) {
- calculate_scaled_size_helper(width, height, resize_denom, 1);
+ calculate_scaled_size_helper(width, resize_denom);
+ calculate_scaled_size_helper(height, resize_denom);
}
-#if CONFIG_FRAME_SUPERRES
void av1_calculate_scaled_superres_size(int *width, int *height,
int superres_denom) {
- calculate_scaled_size_helper(width, height, superres_denom,
- !CONFIG_HORZONLY_FRAME_SUPERRES);
+ (void)height;
+ calculate_scaled_size_helper(width, superres_denom);
}
void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) {
@@ -1199,38 +1210,47 @@ void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) {
// Note: av1_calculate_scaled_superres_size() rounds *up* after division
// when the resulting dimensions are odd. So here, we round *down*.
*width = *width * denom / SCALE_NUMERATOR;
-#if CONFIG_HORZONLY_FRAME_SUPERRES
(void)height;
-#else
- *height = *height * denom / SCALE_NUMERATOR;
-#endif // CONFIG_HORZONLY_FRAME_SUPERRES
}
}
+// Copy only the config data from 'src' to 'dst'.
+static void copy_buffer_config(const YV12_BUFFER_CONFIG *const src,
+ YV12_BUFFER_CONFIG *const dst) {
+ dst->bit_depth = src->bit_depth;
+ dst->color_primaries = src->color_primaries;
+ dst->transfer_characteristics = src->transfer_characteristics;
+ dst->matrix_coefficients = src->matrix_coefficients;
+ dst->monochrome = src->monochrome;
+ dst->chroma_sample_position = src->chroma_sample_position;
+ dst->color_range = src->color_range;
+}
+
// TODO(afergs): Look for in-place upscaling
// TODO(afergs): aom_ vs av1_ functions? Which can I use?
// Upscale decoded image.
void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
- if (av1_superres_unscaled(cm)) return;
+ const int num_planes = av1_num_planes(cm);
+ if (!av1_superres_scaled(cm)) return;
YV12_BUFFER_CONFIG copy_buffer;
memset(&copy_buffer, 0, sizeof(copy_buffer));
YV12_BUFFER_CONFIG *const frame_to_show = get_frame_new_buffer(cm);
- if (aom_alloc_frame_buffer(&copy_buffer, cm->width, cm->height,
+ const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, 3);
+ if (aom_alloc_frame_buffer(&copy_buffer, aligned_width, cm->height,
cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_HIGHBITDEPTH
- AOM_BORDER_IN_PIXELS, cm->byte_alignment))
+ cm->use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->byte_alignment))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate copy buffer for superres upscaling");
- // Copy function assumes the frames are the same size, doesn't copy bit_depth.
- aom_yv12_copy_frame(frame_to_show, &copy_buffer);
- copy_buffer.bit_depth = frame_to_show->bit_depth;
- assert(copy_buffer.y_crop_width == cm->width);
+ // Copy function assumes the frames are the same size.
+ // Note that it does not copy YV12_BUFFER_CONFIG config data.
+ aom_yv12_copy_frame(frame_to_show, &copy_buffer, num_planes);
+
+ assert(copy_buffer.y_crop_width == aligned_width);
assert(copy_buffer.y_crop_height == cm->height);
// Realloc the current frame buffer at a higher resolution in place.
@@ -1248,48 +1268,43 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to free current frame buffer before superres upscaling");
- if (aom_realloc_frame_buffer(
- frame_to_show, cm->superres_upscaled_width,
- cm->superres_upscaled_height, cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_HIGHBITDEPTH
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, fb, cb, cb_priv))
+ // aom_realloc_frame_buffer() leaves config data for frame_to_show intact
+ if (aom_realloc_frame_buffer(frame_to_show, cm->superres_upscaled_width,
+ cm->superres_upscaled_height,
+ cm->subsampling_x, cm->subsampling_y,
+ cm->use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->byte_alignment, fb, cb, cb_priv))
aom_internal_error(
&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate current frame buffer for superres upscaling");
} else {
+ // Make a copy of the config data for frame_to_show in copy_buffer
+ copy_buffer_config(frame_to_show, &copy_buffer);
+
// Don't use callbacks on the encoder.
+ // aom_alloc_frame_buffer() clears the config data for frame_to_show
if (aom_alloc_frame_buffer(frame_to_show, cm->superres_upscaled_width,
cm->superres_upscaled_height, cm->subsampling_x,
- cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_HIGHBITDEPTH
+ cm->subsampling_y, cm->use_highbitdepth,
AOM_BORDER_IN_PIXELS, cm->byte_alignment))
aom_internal_error(
&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to reallocate current frame buffer for superres upscaling");
+
+ // Restore config data back to frame_to_show
+ copy_buffer_config(&copy_buffer, frame_to_show);
}
// TODO(afergs): verify frame_to_show is correct after realloc
// encoder:
// decoder:
- frame_to_show->bit_depth = copy_buffer.bit_depth;
+
assert(frame_to_show->y_crop_width == cm->superres_upscaled_width);
assert(frame_to_show->y_crop_height == cm->superres_upscaled_height);
// Scale up and back into frame_to_show.
assert(frame_to_show->y_crop_width != cm->width);
- assert(IMPLIES(!CONFIG_HORZONLY_FRAME_SUPERRES,
- frame_to_show->y_crop_height != cm->height));
-#if CONFIG_HIGHBITDEPTH
- av1_upscale_normative_and_extend_frame(&copy_buffer, frame_to_show,
- (int)cm->bit_depth);
-#else
- av1_upscale_normative_and_extend_frame(&copy_buffer, frame_to_show);
-#endif // CONFIG_HIGHBITDEPTH
+ av1_upscale_normative_and_extend_frame(cm, &copy_buffer, frame_to_show);
// Free the copy buffer
aom_free_frame_buffer(&copy_buffer);
}
-#endif // CONFIG_FRAME_SUPERRES
diff --git a/third_party/aom/av1/common/resize.h b/third_party/aom/av1/common/resize.h
index 66b32c72d..feec3a90e 100644
--- a/third_party/aom/av1/common/resize.h
+++ b/third_party/aom/av1/common/resize.h
@@ -39,7 +39,6 @@ void av1_resize_frame444(const uint8_t *const y, int y_stride,
int oy_stride, uint8_t *ou, uint8_t *ov,
int ouv_stride, int oheight, int owidth);
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2,
int width2, int out_stride, int bd);
@@ -61,25 +60,16 @@ void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
uint8_t *oy, int oy_stride, uint8_t *ou,
uint8_t *ov, int ouv_stride, int oheight,
int owidth, int bd);
-#endif // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_HIGHBITDEPTH
-void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd);
-#else
void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst);
-#endif // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_FRAME_SUPERRES
-#if CONFIG_HIGHBITDEPTH
-void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd);
-#else
-void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int bd,
+ const int num_planes);
+
+void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride,
+ int plane, int rows);
+void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm,
+ const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_FRAME_SUPERRES
YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
@@ -89,7 +79,6 @@ YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
// resize scale denominator.
void av1_calculate_scaled_size(int *width, int *height, int resize_denom);
-#if CONFIG_FRAME_SUPERRES
// Similar to above, but calculates scaled dimensions after superres from the
// given original dimensions and superres scale denominator.
void av1_calculate_scaled_superres_size(int *width, int *height,
@@ -102,11 +91,19 @@ void av1_calculate_unscaled_superres_size(int *width, int *height, int denom);
void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool);
-// Returns 1 if a superres upscaled frame is unscaled and 0 otherwise.
-static INLINE int av1_superres_unscaled(const AV1_COMMON *cm) {
- return (cm->superres_scale_denominator == SCALE_NUMERATOR);
+// Returns 1 if a superres upscaled frame is scaled and 0 otherwise.
+static INLINE int av1_superres_scaled(const AV1_COMMON *cm) {
+ // Note: for some corner cases (e.g. cm->width of 1), there may be no scaling
+ // required even though cm->superres_scale_denominator != SCALE_NUMERATOR.
+ // So, the following check is more accurate.
+ return !(cm->width == cm->superres_upscaled_width);
}
-#endif // CONFIG_FRAME_SUPERRES
+
+#define UPSCALE_NORMATIVE_TAPS 8
+extern const int16_t av1_resize_filter_normative[1 << RS_SUBPEL_BITS]
+ [UPSCALE_NORMATIVE_TAPS];
+
+int32_t av1_get_upscale_convolve_step(int in_length, int out_length);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/restoration.c b/third_party/aom/av1/common/restoration.c
index 00441f072..58a5275ca 100644
--- a/third_party/aom/av1/common/restoration.c
+++ b/third_party/aom/av1/common/restoration.c
@@ -12,100 +12,130 @@
#include <math.h>
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-#include "./aom_scale_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/aom_scale_rtcd.h"
+
+#include "aom_mem/aom_mem.h"
#include "av1/common/onyxc_int.h"
+#include "av1/common/resize.h"
#include "av1/common/restoration.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
+// The 's' values are calculated based on original 'r' and 'e' values in the
+// spec using GenSgrprojVtable().
+// Note: Setting r = 0 skips the filter; with corresponding s = -1 (invalid).
const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
-#if USE_HIGHPASS_IN_SGRPROJ
- // corner, edge, r2, eps2
- { -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 },
- { -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 },
- { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 },
- { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 }
-#else
-// r1, eps1, r2, eps2
-#if MAX_RADIUS == 2
- { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
- { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
- { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 2 },
- { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 },
-#else
- { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
- { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
- { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
- { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
-#endif // MAX_RADIUS == 2
-#endif
+ { { 2, 1 }, { 140, 3236 } }, { { 2, 1 }, { 112, 2158 } },
+ { { 2, 1 }, { 93, 1618 } }, { { 2, 1 }, { 80, 1438 } },
+ { { 2, 1 }, { 70, 1295 } }, { { 2, 1 }, { 58, 1177 } },
+ { { 2, 1 }, { 47, 1079 } }, { { 2, 1 }, { 37, 996 } },
+ { { 2, 1 }, { 30, 925 } }, { { 2, 1 }, { 25, 863 } },
+ { { 0, 1 }, { -1, 2589 } }, { { 0, 1 }, { -1, 1618 } },
+ { { 0, 1 }, { -1, 1177 } }, { { 0, 1 }, { -1, 925 } },
+ { { 2, 0 }, { 56, -1 } }, { { 2, 0 }, { 22, -1 } },
};
-typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
- int stride, RestorationInternal *rst,
- uint8_t *dst8, int dst_stride);
-#if CONFIG_HIGHBITDEPTH
-typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
- int stride, RestorationInternal *rst,
- int bit_depth, uint8_t *dst8,
- int dst_stride);
-#endif // CONFIG_HIGHBITDEPTH
-
-int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info,
- int width, int height) {
- const int ntiles = av1_get_rest_ntiles(
- width, height, rst_info->restoration_tilesize, NULL, NULL, NULL, NULL);
- aom_free(rst_info->restoration_type);
- CHECK_MEM_ERROR(cm, rst_info->restoration_type,
- (RestorationType *)aom_malloc(
- sizeof(*rst_info->restoration_type) * ntiles));
- aom_free(rst_info->wiener_info);
- CHECK_MEM_ERROR(
- cm, rst_info->wiener_info,
- (WienerInfo *)aom_memalign(16, sizeof(*rst_info->wiener_info) * ntiles));
- memset(rst_info->wiener_info, 0, sizeof(*rst_info->wiener_info) * ntiles);
- aom_free(rst_info->sgrproj_info);
- CHECK_MEM_ERROR(
- cm, rst_info->sgrproj_info,
- (SgrprojInfo *)aom_malloc(sizeof(*rst_info->sgrproj_info) * ntiles));
- return ntiles;
+AV1PixelRect av1_whole_frame_rect(const AV1_COMMON *cm, int is_uv) {
+ AV1PixelRect rect;
+
+ int ss_x = is_uv && cm->subsampling_x;
+ int ss_y = is_uv && cm->subsampling_y;
+
+ rect.top = 0;
+ rect.bottom = ROUND_POWER_OF_TWO(cm->height, ss_y);
+ rect.left = 0;
+ rect.right = ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
+ return rect;
+}
+
+// Count horizontal or vertical units per tile (use a width or height for
+// tile_size, respectively). We basically want to divide the tile size by the
+// size of a restoration unit. Rather than rounding up unconditionally as you
+// might expect, we round to nearest, which models the way a right or bottom
+// restoration unit can extend to up to 150% its normal width or height. The
+// max with 1 is to deal with tiles that are smaller than half of a restoration
+// unit.
+int av1_lr_count_units_in_tile(int unit_size, int tile_size) {
+ return AOMMAX((tile_size + (unit_size >> 1)) / unit_size, 1);
+}
+
+void av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi,
+ int is_uv) {
+ // We need to allocate enough space for restoration units to cover the
+ // largest tile. Without CONFIG_MAX_TILE, this is always the tile at the
+ // top-left and we can use av1_get_tile_rect(). With CONFIG_MAX_TILE, we have
+ // to do the computation ourselves, iterating over the tiles and keeping
+ // track of the largest width and height, then upscaling.
+ const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
+ const int max_tile_w = tile_rect.right - tile_rect.left;
+ const int max_tile_h = tile_rect.bottom - tile_rect.top;
+
+ // To calculate hpertile and vpertile (horizontal and vertical units per
+ // tile), we basically want to divide the largest tile width or height by the
+ // size of a restoration unit. Rather than rounding up unconditionally as you
+ // might expect, we round to nearest, which models the way a right or bottom
+ // restoration unit can extend to up to 150% its normal width or height. The
+ // max with 1 is to deal with tiles that are smaller than half of a
+ // restoration unit.
+ const int unit_size = rsi->restoration_unit_size;
+ const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w);
+ const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h);
+
+ rsi->units_per_tile = hpertile * vpertile;
+ rsi->horz_units_per_tile = hpertile;
+ rsi->vert_units_per_tile = vpertile;
+
+ const int ntiles = 1;
+ const int nunits = ntiles * rsi->units_per_tile;
+
+ aom_free(rsi->unit_info);
+ CHECK_MEM_ERROR(cm, rsi->unit_info,
+ (RestorationUnitInfo *)aom_memalign(
+ 16, sizeof(*rsi->unit_info) * nunits));
}
void av1_free_restoration_struct(RestorationInfo *rst_info) {
- aom_free(rst_info->restoration_type);
- rst_info->restoration_type = NULL;
- aom_free(rst_info->wiener_info);
- rst_info->wiener_info = NULL;
- aom_free(rst_info->sgrproj_info);
- rst_info->sgrproj_info = NULL;
+ aom_free(rst_info->unit_info);
+ rst_info->unit_info = NULL;
}
-// TODO(debargha): This table can be substantially reduced since only a few
-// values are actually used.
-int sgrproj_mtable[MAX_EPS][MAX_NELEM];
+#if 0
+// Pair of values for each sgrproj parameter:
+// Index 0 corresponds to r[0], e[0]
+// Index 1 corresponds to r[1], e[1]
+int sgrproj_mtable[SGRPROJ_PARAMS][2];
static void GenSgrprojVtable() {
- int e, n;
- for (e = 1; e <= MAX_EPS; ++e)
- for (n = 1; n <= MAX_NELEM; ++n) {
- const int n2e = n * n * e;
- sgrproj_mtable[e - 1][n - 1] =
- (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
+ for (int i = 0; i < SGRPROJ_PARAMS; ++i) {
+ const sgr_params_type *const params = &sgr_params[i];
+ for (int j = 0; j < 2; ++j) {
+ const int e = params->e[j];
+ const int r = params->r[j];
+ if (r == 0) { // filter is disabled
+ sgrproj_mtable[i][j] = -1; // mark invalid
+ } else { // filter is enabled
+ const int n = (2 * r + 1) * (2 * r + 1);
+ const int n2e = n * n * e;
+ assert(n2e != 0);
+ sgrproj_mtable[i][j] = (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
+ }
}
+ }
}
+#endif
-void av1_loop_restoration_precal() { GenSgrprojVtable(); }
-
-static void loop_restoration_init(RestorationInternal *rst, int kf) {
- rst->keyframe = kf;
+void av1_loop_restoration_precal() {
+#if 0
+ GenSgrprojVtable();
+#endif
}
-void extend_frame(uint8_t *data, int width, int height, int stride,
- int border_horz, int border_vert) {
+static void extend_frame_lowbd(uint8_t *data, int width, int height, int stride,
+ int border_horz, int border_vert) {
uint8_t *data_p;
int i;
for (i = 0; i < height; ++i) {
@@ -123,261 +153,297 @@ void extend_frame(uint8_t *data, int width, int height, int stride,
}
}
-#if CONFIG_STRIPED_LOOP_RESTORATION
-
-// This function setup a processing stripe by replacing the vertical
-// stripe boundary (2 lines above and 2 lines below) by data coming
-// from the above/below buffers. Before doing so the original
-// frame data is saved into a temporary buffer, such that it
-// can be restored by the restore_processing_stripe_boundary
-// function after the filtering of the processing stripe.
-// Returns the height of the processing stripe
-static int setup_processing_stripe_boundary(int y0, int v_end, int h_start,
- int h_end, uint8_t *data,
- int stride,
- RestorationInternal *rst,
- int use_highbd) {
- int y, y_stripe_topmost, stripe_index, i;
- int tile_offset = RESTORATION_TILE_OFFSET >> rst->subsampling_y;
- int stripe_height = rst->rsi->procunit_height;
- int comp = rst->component;
- uint8_t *boundary_above_buf = rst->stripe_boundary_above[comp];
- uint8_t *boundary_below_buf = rst->stripe_boundary_below[comp];
- int boundary_stride = rst->stripe_boundary_stride[comp];
- int x0 = h_start - RESTORATION_EXTRA_HORZ;
- int x1 = h_end + RESTORATION_EXTRA_HORZ;
-
- stripe_index = (y0 + tile_offset) / stripe_height;
- y_stripe_topmost = stripe_index * stripe_height - tile_offset;
- boundary_above_buf +=
- ((stripe_index - 1) * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
- << use_highbd;
- boundary_below_buf +=
- (stripe_index * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
- << use_highbd;
-
- // setup the 2 lines above the stripe
- for (i = 0; i < 2; i++) {
- y = y_stripe_topmost - 2 + i;
- if (y >= 0 && y < y0 && y >= y0 - 2) {
- uint8_t *p = data + ((y * stride + x0) << use_highbd);
- uint8_t *new_data =
- boundary_above_buf + ((i * boundary_stride + x0) << use_highbd);
- // printf("above %3d %3d: %08x %08x : %08x %08x\n", y, x0,
- // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
- // ((uint32_t*)new_data)[1]);
- // Save old pixels
- memcpy(rst->tmp_save_above[i], p, (x1 - x0) << use_highbd);
- // Replace width pixels from boundary_above_buf
- memcpy(p, new_data, (x1 - x0) << use_highbd);
- }
+static void extend_frame_highbd(uint16_t *data, int width, int height,
+ int stride, int border_horz, int border_vert) {
+ uint16_t *data_p;
+ int i, j;
+ for (i = 0; i < height; ++i) {
+ data_p = data + i * stride;
+ for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
+ for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
}
- // setup the 2 lines below the stripe
- for (i = 0; i < 2; i++) {
- y = y_stripe_topmost + stripe_height + i;
- if (y < v_end + 2) {
- uint8_t *p = data + ((y * stride + x0) << use_highbd);
- uint8_t *new_data =
- boundary_below_buf + ((i * boundary_stride + x0) << use_highbd);
- // printf("below %3d %3d: %08x %08x : %08x %08x\n", y, x0,
- // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
- // ((uint32_t*)new_data)[1]);
- // Save old pixels
- memcpy(rst->tmp_save_below[i], p, (x1 - x0) << use_highbd);
- // Replace width pixels from boundary_below_buf
- memcpy(p, new_data, (x1 - x0) << use_highbd);
- }
+ data_p = data - border_horz;
+ for (i = -border_vert; i < 0; ++i) {
+ memcpy(data_p + i * stride, data_p,
+ (width + 2 * border_horz) * sizeof(uint16_t));
+ }
+ for (i = height; i < height + border_vert; ++i) {
+ memcpy(data_p + i * stride, data_p + (height - 1) * stride,
+ (width + 2 * border_horz) * sizeof(uint16_t));
}
- // Return actual stripe height
- return AOMMIN(v_end, y_stripe_topmost + stripe_height) - y0;
}
-// This function restores the boundary lines modified by
-// setup_processing_stripe_boundary.
-static void restore_processing_stripe_boundary(int y0, int v_end, int h_start,
- int h_end, uint8_t *data,
- int stride,
- RestorationInternal *rst,
- int use_highbd) {
- int y, y_stripe_topmost, i, stripe_index;
- int tile_offset = 8 >> rst->subsampling_y;
- int stripe_height = rst->rsi->procunit_height;
- int x0 = h_start - RESTORATION_EXTRA_HORZ;
- int x1 = h_end + RESTORATION_EXTRA_HORZ;
-
- stripe_index = (y0 + tile_offset) / stripe_height;
- y_stripe_topmost = stripe_index * stripe_height - tile_offset;
-
- // restore the 2 lines above the stripe
- for (i = 0; i < 2; i++) {
- y = y_stripe_topmost - 2 + i;
- if (y >= 0 && y < y0 && y >= y0 - 2) {
- uint8_t *p = data + ((y * stride + x0) << use_highbd);
- memcpy(p, rst->tmp_save_above[i], (x1 - x0) << use_highbd);
- }
- }
- // restore the 2 lines below the stripe
- for (i = 0; i < 2; i++) {
- y = y_stripe_topmost + stripe_height + i;
- if (y < v_end + 2) {
- uint8_t *p = data + ((y * stride + x0) << use_highbd);
- memcpy(p, rst->tmp_save_below[i], (x1 - x0) << use_highbd);
- }
- }
+void extend_frame(uint8_t *data, int width, int height, int stride,
+ int border_horz, int border_vert, int highbd) {
+ if (highbd)
+ extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride,
+ border_horz, border_vert);
+ else
+ extend_frame_lowbd(data, width, height, stride, border_horz, border_vert);
}
-#endif
+static void copy_tile_lowbd(int width, int height, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride) {
+ for (int i = 0; i < height; ++i)
+ memcpy(dst + i * dst_stride, src + i * src_stride, width);
+}
-static void loop_copy_tile(uint8_t *data, int tile_idx, int width, int height,
- int stride, RestorationInternal *rst, uint8_t *dst,
- int dst_stride) {
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
- for (int i = limits.v_start; i < limits.v_end; ++i)
- memcpy(dst + i * dst_stride + limits.h_start,
- data + i * stride + limits.h_start, limits.h_end - limits.h_start);
+static void copy_tile_highbd(int width, int height, const uint16_t *src,
+ int src_stride, uint16_t *dst, int dst_stride) {
+ for (int i = 0; i < height; ++i)
+ memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst));
}
-static void stepdown_wiener_kernel(const InterpKernel orig, InterpKernel vert,
- int boundary_dist, int istop) {
- memcpy(vert, orig, sizeof(InterpKernel));
- switch (boundary_dist) {
- case 0:
- vert[WIENER_HALFWIN] += vert[2] + vert[1] + vert[0];
- vert[2] = vert[1] = vert[0] = 0;
- break;
- case 1:
- vert[2] += vert[1] + vert[0];
- vert[1] = vert[0] = 0;
- break;
- case 2:
- vert[1] += vert[0];
- vert[0] = 0;
- break;
- default: break;
- }
- if (!istop) {
- int tmp;
- tmp = vert[0];
- vert[0] = vert[WIENER_WIN - 1];
- vert[WIENER_WIN - 1] = tmp;
- tmp = vert[1];
- vert[1] = vert[WIENER_WIN - 2];
- vert[WIENER_WIN - 2] = tmp;
- tmp = vert[2];
- vert[2] = vert[WIENER_WIN - 3];
- vert[WIENER_WIN - 3] = tmp;
- }
+static void copy_tile(int width, int height, const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int highbd) {
+ if (highbd)
+ copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride,
+ CONVERT_TO_SHORTPTR(dst), dst_stride);
+ else
+ copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride);
}
-static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
- int height, int stride,
- RestorationInternal *rst, uint8_t *dst,
- int dst_stride) {
- const int procunit_width = rst->rsi->procunit_width;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int procunit_height;
-#else
- const int procunit_height = rst->rsi->procunit_height;
-#endif
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile(data, tile_idx, width, height, stride, rst, dst, dst_stride);
- return;
+#define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d))
+
+// With striped loop restoration, the filtering for each 64-pixel stripe gets
+// most of its input from the output of CDEF (stored in data8), but we need to
+// fill out a border of 3 pixels above/below the stripe according to the
+// following
+// rules:
+//
+// * At a frame boundary, we copy the outermost row of CDEF pixels three times.
+// This extension is done by a call to extend_frame() at the start of the loop
+// restoration process, so the value of copy_above/copy_below doesn't strictly
+// matter.
+// However, by setting *copy_above = *copy_below = 1 whenever loop filtering
+// across tiles is disabled, we can allow
+// {setup,restore}_processing_stripe_boundary to assume that the top/bottom
+// data has always been copied, simplifying the behaviour at the left and
+// right edges of tiles.
+//
+// * If we're at a tile boundary and loop filtering across tiles is enabled,
+// then there is a logical stripe which is 64 pixels high, but which is split
+// into an 8px high and a 56px high stripe so that the processing (and
+// coefficient set usage) can be aligned to tiles.
+// In this case, we use the 3 rows of CDEF output across the boundary for
+// context; this corresponds to leaving the frame buffer as-is.
+//
+// * If we're at a tile boundary and loop filtering across tiles is disabled,
+// then we take the outermost row of CDEF pixels *within the current tile*
+// and copy it three times. Thus we behave exactly as if the tile were a full
+// frame.
+//
+// * Otherwise, we're at a stripe boundary within a tile. In that case, we
+// take 2 rows of deblocked pixels and extend them to 3 rows of context.
+//
+// The distinction between the latter two cases is handled by the
+// av1_loop_restoration_save_boundary_lines() function, so here we just need
+// to decide if we're overwriting the above/below boundary pixels or not.
+static void get_stripe_boundary_info(const RestorationTileLimits *limits,
+ const AV1PixelRect *tile_rect, int ss_y,
+ int *copy_above, int *copy_below) {
+ *copy_above = 1;
+ *copy_below = 1;
+
+ const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
+ const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
+
+ const int first_stripe_in_tile = (limits->v_start == tile_rect->top);
+ const int this_stripe_height =
+ full_stripe_height - (first_stripe_in_tile ? runit_offset : 0);
+ const int last_stripe_in_tile =
+ (limits->v_start + this_stripe_height >= tile_rect->bottom);
+
+ if (first_stripe_in_tile) *copy_above = 0;
+ if (last_stripe_in_tile) *copy_below = 0;
+}
+
+// Overwrite the border pixels around a processing stripe so that the conditions
+// listed above get_stripe_boundary_info() are preserved.
+// We save the pixels which get overwritten into a temporary buffer, so that
+// they can be restored by restore_processing_stripe_boundary() after we've
+// processed the stripe.
+//
+// limits gives the rectangular limits of the remaining stripes for the current
+// restoration unit. rsb is the stored stripe boundaries (taken from either
+// deblock or CDEF output as necessary).
+//
+// tile_rect is the limits of the current tile and tile_stripe0 is the index of
+// the first stripe in this tile (needed to convert the tile-relative stripe
+// index we get from limits into something we can look up in rsb).
+static void setup_processing_stripe_boundary(
+ const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
+ int rsb_row, int use_highbd, int h, uint8_t *data8, int data_stride,
+ RestorationLineBuffers *rlbs, int copy_above, int copy_below, int opt) {
+ // Offsets within the line buffers. The buffer logically starts at column
+ // -RESTORATION_EXTRA_HORZ so the 1st column (at x0 - RESTORATION_EXTRA_HORZ)
+ // has column x0 in the buffer.
+ const int buf_stride = rsb->stripe_boundary_stride;
+ const int buf_x0_off = limits->h_start;
+ const int line_width =
+ (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
+ const int line_size = line_width << use_highbd;
+
+ const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
+
+ // Replace RESTORATION_BORDER pixels above the top of the stripe
+ // We expand RESTORATION_CTX_VERT=2 lines from rsb->stripe_boundary_above
+ // to fill RESTORATION_BORDER=3 lines of above pixels. This is done by
+ // duplicating the topmost of the 2 lines (see the AOMMAX call when
+ // calculating src_row, which gets the values 0, 0, 1 for i = -3, -2, -1).
+ //
+ // Special case: If we're at the top of a tile, which isn't on the topmost
+ // tile row, and we're allowed to loop filter across tiles, then we have a
+ // logical 64-pixel-high stripe which has been split into an 8-pixel high
+ // stripe and a 56-pixel high stripe (the current one). So, in this case,
+ // we want to leave the boundary alone!
+ if (!opt) {
+ if (copy_above) {
+ uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+
+ for (int i = -RESTORATION_BORDER; i < 0; ++i) {
+ const int buf_row = rsb_row + AOMMAX(i + RESTORATION_CTX_VERT, 0);
+ const int buf_off = buf_x0_off + buf_row * buf_stride;
+ const uint8_t *buf =
+ rsb->stripe_boundary_above + (buf_off << use_highbd);
+ uint8_t *dst8 = data8_tl + i * data_stride;
+ // Save old pixels, then replace with data from stripe_boundary_above
+ memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER],
+ REAL_PTR(use_highbd, dst8), line_size);
+ memcpy(REAL_PTR(use_highbd, dst8), buf, line_size);
+ }
+ }
+
+ // Replace RESTORATION_BORDER pixels below the bottom of the stripe.
+ // The second buffer row is repeated, so src_row gets the values 0, 1, 1
+ // for i = 0, 1, 2.
+ if (copy_below) {
+ const int stripe_end = limits->v_start + h;
+ uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
+
+ for (int i = 0; i < RESTORATION_BORDER; ++i) {
+ const int buf_row = rsb_row + AOMMIN(i, RESTORATION_CTX_VERT - 1);
+ const int buf_off = buf_x0_off + buf_row * buf_stride;
+ const uint8_t *src =
+ rsb->stripe_boundary_below + (buf_off << use_highbd);
+
+ uint8_t *dst8 = data8_bl + i * data_stride;
+ // Save old pixels, then replace with data from stripe_boundary_below
+ memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
+ memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
+ }
+ }
+ } else {
+ if (copy_above) {
+ uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+
+ // Only save and overwrite i=-RESTORATION_BORDER line.
+ uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
+ // Save old pixels, then replace with data from stripe_boundary_above
+ memcpy(rlbs->tmp_save_above[0], REAL_PTR(use_highbd, dst8), line_size);
+ memcpy(REAL_PTR(use_highbd, dst8),
+ REAL_PTR(use_highbd,
+ data8_tl + (-RESTORATION_BORDER + 1) * data_stride),
+ line_size);
+ }
+
+ if (copy_below) {
+ const int stripe_end = limits->v_start + h;
+ uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
+
+ // Only save and overwrite i=2 line.
+ uint8_t *dst8 = data8_bl + 2 * data_stride;
+ // Save old pixels, then replace with data from stripe_boundary_below
+ memcpy(rlbs->tmp_save_below[2], REAL_PTR(use_highbd, dst8), line_size);
+ memcpy(REAL_PTR(use_highbd, dst8),
+ REAL_PTR(use_highbd, data8_bl + (2 - 1) * data_stride), line_size);
+ }
}
- InterpKernel vertical_topbot;
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
+}
- // Convolve the whole tile (done in blocks here to match the requirements
- // of the vectorized convolve functions, but the result is equivalent)
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int h = setup_processing_stripe_boundary(
- i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
- h = ALIGN_POWER_OF_TWO(h, 1);
- procunit_height = h;
-#else
- int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
-#endif
- for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
- int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
- const uint8_t *data_p = data + i * stride + j;
- uint8_t *dst_p = dst + i * dst_stride + j;
- // Note h is at least 16
- for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
- stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
- vertical_topbot, WIENER_BORDER_VERT + b, 1);
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1);
-#else
- aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride;
- dst_p += dst_stride;
+// This function restores the boundary lines modified by
+// setup_processing_stripe_boundary.
+//
+// Note: We need to be careful when handling the corners of the processing
+// unit, because (eg.) the top-left corner is considered to be part of
+// both the left and top borders. This means that, depending on the
+// loop_filter_across_tiles_enabled flag, the corner pixels might get
+// overwritten twice, once as part of the "top" border and once as part
+// of the "left" border (or similar for other corners).
+//
+// Everything works out fine as long as we make sure to reverse the order
+// when restoring, ie. we need to restore the left/right borders followed
+// by the top/bottom borders.
+static void restore_processing_stripe_boundary(
+ const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
+ int use_highbd, int h, uint8_t *data8, int data_stride, int copy_above,
+ int copy_below, int opt) {
+ const int line_width =
+ (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
+ const int line_size = line_width << use_highbd;
+
+ const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
+
+ if (!opt) {
+ if (copy_above) {
+ uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+ for (int i = -RESTORATION_BORDER; i < 0; ++i) {
+ uint8_t *dst8 = data8_tl + i * data_stride;
+ memcpy(REAL_PTR(use_highbd, dst8),
+ rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size);
}
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
- h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
-#else
- aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
- h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
- dst_p += dst_stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
- for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
- stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
- vertical_topbot, WIENER_BORDER_VERT + b, 0);
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1);
-#else
- aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride;
- dst_p += dst_stride;
+ }
+
+ if (copy_below) {
+ const int stripe_bottom = limits->v_start + h;
+ uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
+
+ for (int i = 0; i < RESTORATION_BORDER; ++i) {
+ if (stripe_bottom + i >= limits->v_end + RESTORATION_BORDER) break;
+
+ uint8_t *dst8 = data8_bl + i * data_stride;
+ memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
+ }
+ }
+ } else {
+ if (copy_above) {
+ uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+
+ // Only restore i=-RESTORATION_BORDER line.
+ uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
+ memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[0], line_size);
+ }
+
+ if (copy_below) {
+ const int stripe_bottom = limits->v_start + h;
+ uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
+
+ // Only restore i=2 line.
+ if (stripe_bottom + 2 < limits->v_end + RESTORATION_BORDER) {
+ uint8_t *dst8 = data8_bl + 2 * data_stride;
+ memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[2], line_size);
}
}
-#if CONFIG_STRIPED_LOOP_RESTORATION
- restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, data, stride, rst, 0);
-#endif
}
}
-static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
- RestorationInternal *rst, uint8_t *dst,
- int dst_stride) {
- int tile_idx;
- extend_frame(data, width, height, stride, WIENER_BORDER_HORZ,
- WIENER_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
+static void wiener_filter_stripe(const RestorationUnitInfo *rui,
+ int stripe_width, int stripe_height,
+ int procunit_width, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride,
+ int32_t *tmpbuf, int bit_depth) {
+ (void)tmpbuf;
+ (void)bit_depth;
+ assert(bit_depth == 8);
+ const ConvolveParams conv_params = get_conv_params_wiener(8);
+
+ for (int j = 0; j < stripe_width; j += procunit_width) {
+ int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
+ const uint8_t *src_p = src + j;
+ uint8_t *dst_p = dst + j;
+ av1_wiener_convolve_add_src(
+ src_p, src_stride, dst_p, dst_stride, rui->wiener_info.hfilter, 16,
+ rui->wiener_info.vfilter, 16, w, stripe_height, &conv_params);
}
}
@@ -391,6 +457,8 @@ static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
static void boxsum1(int32_t *src, int width, int height, int src_stride,
int sqr, int32_t *dst, int dst_stride) {
int i, j, a, b, c;
+ assert(width > 2 * SGRPROJ_BORDER_HORZ);
+ assert(height > 2 * SGRPROJ_BORDER_VERT);
// Vertical sum over 3-pixel regions, from src into dst.
if (!sqr) {
@@ -456,6 +524,8 @@ static void boxsum1(int32_t *src, int width, int height, int src_stride,
static void boxsum2(int32_t *src, int width, int height, int src_stride,
int sqr, int32_t *dst, int dst_stride) {
int i, j, a, b, c, d, e;
+ assert(width > 2 * SGRPROJ_BORDER_HORZ);
+ assert(height > 2 * SGRPROJ_BORDER_VERT);
// Vertical sum over 5-pixel regions, from src into dst.
if (!sqr) {
@@ -540,202 +610,33 @@ static void boxsum2(int32_t *src, int width, int height, int src_stride,
}
}
-static void boxsum3(int32_t *src, int width, int height, int src_stride,
- int sqr, int32_t *dst, int dst_stride) {
- int i, j, a, b, c, d, e, f, g;
-
- // Vertical sum over 7-pixel regions, from src into dst.
- if (!sqr) {
- for (j = 0; j < width; ++j) {
- a = src[j];
- b = src[1 * src_stride + j];
- c = src[2 * src_stride + j];
- d = src[3 * src_stride + j];
- e = src[4 * src_stride + j];
- f = src[5 * src_stride + j];
- g = src[6 * src_stride + j];
-
- dst[j] = a + b + c + d;
- dst[dst_stride + j] = a + b + c + d + e;
- dst[2 * dst_stride + j] = a + b + c + d + e + f;
- for (i = 3; i < height - 4; ++i) {
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- a = b;
- b = c;
- c = d;
- d = e;
- e = f;
- f = g;
- g = src[(i + 4) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
- dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
- dst[(i + 3) * dst_stride + j] = d + e + f + g;
- }
- } else {
- for (j = 0; j < width; ++j) {
- a = src[j] * src[j];
- b = src[1 * src_stride + j] * src[1 * src_stride + j];
- c = src[2 * src_stride + j] * src[2 * src_stride + j];
- d = src[3 * src_stride + j] * src[3 * src_stride + j];
- e = src[4 * src_stride + j] * src[4 * src_stride + j];
- f = src[5 * src_stride + j] * src[5 * src_stride + j];
- g = src[6 * src_stride + j] * src[6 * src_stride + j];
-
- dst[j] = a + b + c + d;
- dst[dst_stride + j] = a + b + c + d + e;
- dst[2 * dst_stride + j] = a + b + c + d + e + f;
- for (i = 3; i < height - 4; ++i) {
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- a = b;
- b = c;
- c = d;
- d = e;
- e = f;
- f = g;
- g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
- dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
- dst[(i + 3) * dst_stride + j] = d + e + f + g;
- }
- }
-
- // Horizontal sum over 7-pixel regions of dst
- for (i = 0; i < height; ++i) {
- a = dst[i * dst_stride];
- b = dst[i * dst_stride + 1];
- c = dst[i * dst_stride + 2];
- d = dst[i * dst_stride + 3];
- e = dst[i * dst_stride + 4];
- f = dst[i * dst_stride + 5];
- g = dst[i * dst_stride + 6];
-
- dst[i * dst_stride] = a + b + c + d;
- dst[i * dst_stride + 1] = a + b + c + d + e;
- dst[i * dst_stride + 2] = a + b + c + d + e + f;
- for (j = 3; j < width - 4; ++j) {
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- a = b;
- b = c;
- c = d;
- d = e;
- e = f;
- f = g;
- g = dst[i * dst_stride + (j + 4)];
- }
- dst[i * dst_stride + j] = a + b + c + d + e + f + g;
- dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g;
- dst[i * dst_stride + (j + 2)] = c + d + e + f + g;
- dst[i * dst_stride + (j + 3)] = d + e + f + g;
- }
-}
-
-// Generic version for any r. To be removed after experiments are done.
-static void boxsumr(int32_t *src, int width, int height, int src_stride, int r,
- int sqr, int32_t *dst, int dst_stride) {
- int32_t *tmp = aom_malloc(width * height * sizeof(*tmp));
- int tmp_stride = width;
- int i, j;
- if (sqr) {
- for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
- for (j = 0; j < width; ++j)
- for (i = 1; i < height; ++i)
- tmp[i * tmp_stride + j] =
- tmp[(i - 1) * tmp_stride + j] +
- src[i * src_stride + j] * src[i * src_stride + j];
- } else {
- memcpy(tmp, src, sizeof(*tmp) * width);
- for (j = 0; j < width; ++j)
- for (i = 1; i < height; ++i)
- tmp[i * tmp_stride + j] =
- tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
- }
- for (i = 0; i <= r; ++i)
- memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
- sizeof(*tmp) * width);
- for (i = r + 1; i < height - r; ++i)
- for (j = 0; j < width; ++j)
- dst[i * dst_stride + j] =
- tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
- for (i = height - r; i < height; ++i)
- for (j = 0; j < width; ++j)
- dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
- tmp[(i - r - 1) * tmp_stride + j];
-
- for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
- for (i = 0; i < height; ++i)
- for (j = 1; j < width; ++j)
- tmp[i * tmp_stride + j] =
- tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
-
- for (j = 0; j <= r; ++j)
- for (i = 0; i < height; ++i)
- dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
- for (j = r + 1; j < width - r; ++j)
- for (i = 0; i < height; ++i)
- dst[i * dst_stride + j] =
- tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
- for (j = width - r; j < width; ++j)
- for (i = 0; i < height; ++i)
- dst[i * dst_stride + j] =
- tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
- aom_free(tmp);
-}
-
static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
int sqr, int32_t *dst, int dst_stride) {
if (r == 1)
boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
else if (r == 2)
boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
- else if (r == 3)
- boxsum3(src, width, height, src_stride, sqr, dst, dst_stride);
else
- boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride);
+ assert(0 && "Invalid value of r in self-guided filter");
}
-static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
- int i, j;
- for (i = 0; i <= r; ++i) {
- for (j = 0; j <= r; ++j) {
- num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j);
- num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
- num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
- num[(height - 1 - i) * num_stride + (width - 1 - j)] =
- num[i * num_stride + j];
- }
- }
- for (j = 0; j <= r; ++j) {
- const int val = (2 * r + 1) * (r + 1 + j);
- for (i = r + 1; i < height - r; ++i) {
- num[i * num_stride + j] = val;
- num[i * num_stride + (width - 1 - j)] = val;
- }
- }
- for (i = 0; i <= r; ++i) {
- const int val = (2 * r + 1) * (r + 1 + i);
- for (j = r + 1; j < width - r; ++j) {
- num[i * num_stride + j] = val;
- num[(height - 1 - i) * num_stride + j] = val;
- }
- }
- for (i = r + 1; i < height - r; ++i) {
- for (j = r + 1; j < width - r; ++j) {
- num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1);
- }
+void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
+ if (params->r[0] == 0) {
+ xq[0] = 0;
+ xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
+ } else if (params->r[1] == 0) {
+ xq[0] = xqd[0];
+ xq[1] = 0;
+ } else {
+ xq[0] = xqd[0];
+ xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
}
}
-void decode_xq(int *xqd, int *xq) {
- xq[0] = xqd[0];
- xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
-}
-
const int32_t x_by_xplus1[256] = {
- 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
+ // Special case: Map 0 -> 1 (corresponding to a value of 1/256)
+ // instead of 0. See comments in selfguided_restoration_internal() for why
+ 1, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
@@ -758,19 +659,15 @@ const int32_t x_by_xplus1[256] = {
const int32_t one_by_x[MAX_NELEM] = {
4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
-#if MAX_RADIUS > 2
- 158, 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108,
- 105, 102, 100, 98, 95, 93, 91, 89, 87, 85, 84
-#endif // MAX_RADIUS > 2
};
-static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
- int height, int dgd_stride,
- int32_t *dst, int dst_stride,
- int bit_depth, int r, int eps) {
+static void selfguided_restoration_fast_internal(
+ int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst,
+ int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- const int num_stride = width_ext;
// Adjusting the stride of A and B here appears to avoid bad cache effects,
// leading to a significant speed improvement.
// We also align the stride to a multiple of 16 bytes, for consistency
@@ -780,25 +677,24 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
int32_t B_[RESTORATION_PROC_UNIT_PELS];
int32_t *A = A_;
int32_t *B = B_;
- int8_t num_[RESTORATION_PROC_UNIT_PELS];
- int8_t *num = num_ + SGRPROJ_BORDER_VERT * num_stride + SGRPROJ_BORDER_HORZ;
int i, j;
- // Don't filter tiles with dimensions < 5 on any axis
- if ((width < 5) || (height < 5)) return;
+ assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
+ assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
+ "Need SGRPROJ_BORDER_* >= r+1");
boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
- boxnum(width_ext, height_ext, r, num_, num_stride);
- assert(r <= 3);
A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
+ // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
+ // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
+ for (i = -1; i < height + 1; i += 2) {
+ for (j = -1; j < width + 1; ++j) {
const int k = i * buf_stride + j;
- const int n = num[i * num_stride + j];
+ const int n = (2 * r + 1) * (2 * r + 1);
// a < 2^16 * n < 2^22 regardless of bit depth
uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
@@ -807,139 +703,192 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
// Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
// and p itself satisfies p < 2^14 * n^2 < 2^26.
+ // This bound on p is due to:
+ // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
+ //
// Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
// This is an artefact of rounding, and can only happen if all pixels
// are (almost) identical, so in this case we saturate to p=0.
uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
- uint32_t s = sgrproj_mtable[eps - 1][n - 1];
+
+ const uint32_t s = params->s[radius_idx];
// p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
// as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
// (this holds even after accounting for the rounding in s)
const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
- A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8
-
- // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n,
+ // Note: We have to be quite careful about the value of A[k].
+ // This is used as a blend factor between individual pixel values and the
+ // local mean. So it logically has a range of [0, 256], including both
+ // endpoints.
+ //
+ // This is a pain for hardware, as we'd like something which can be stored
+ // in exactly 8 bits.
+ // Further, in the calculation of B[k] below, if z == 0 and r == 2,
+ // then A[k] "should be" 0. But then we can end up setting B[k] to a value
+ // slightly above 2^(8 + bit depth), due to rounding in the value of
+ // one_by_x[25-1].
+ //
+ // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
+ // This fixes the above issues (256 - A[k] fits in a uint8, and we can't
+ // overflow), without significantly affecting the final result: z == 0
+ // implies that the image is essentially "flat", so the local mean and
+ // individual pixel values are very similar.
+ //
+ // Note that saturating on the other side, ie. requring A[k] <= 255,
+ // would be a bad idea, as that corresponds to the case where the image
+ // is very variable, when we want to preserve the local pixel value as
+ // much as possible.
+ A[k] = x_by_xplus1[AOMMIN(z, 255)]; // in range [1, 256]
+
+ // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
// one_by_x[n - 1] = round(2^12 / n)
// => the product here is < 2^(20 + bit_depth) <= 2^32,
// and B[k] is set to a value < 2^(8 + bit depth)
+ // This holds even with the rounding in one_by_x and in the overall
+ // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
(uint32_t)B[k] *
(uint32_t)one_by_x[n - 1],
SGRPROJ_RECIP_BITS);
}
}
- i = 0;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a =
- 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1];
- const int32_t b =
- 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- i = 0;
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a =
- 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1];
- const int32_t b =
- 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- i = height - 1;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a =
- 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1];
- const int32_t b =
- 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- i = height - 1;
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a =
- 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1];
- const int32_t b =
- 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- i = 0;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
- A[k + buf_stride - 1] + A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
- B[k + buf_stride - 1] + B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- i = height - 1;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
- A[k - buf_stride - 1] + A[k - buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
- B[k - buf_stride - 1] + B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- j = 0;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+ // Use the A[] and B[] arrays to calculate the filtered image
+ assert(r == 2);
+ for (i = 0; i < height; ++i) {
+ if (!(i & 1)) { // even row
+ for (j = 0; j < width; ++j) {
+ const int k = i * buf_stride + j;
+ const int l = i * dgd_stride + j;
+ const int m = i * dst_stride + j;
+ const int nb = 5;
+ const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 +
+ (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
+ A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
+ 5;
+ const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 +
+ (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
+ B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
+ 5;
+ const int32_t v = a * dgd[l] + b;
+ dst[m] =
+ ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+ }
+ } else { // odd row
+ for (j = 0; j < width; ++j) {
+ const int k = i * buf_stride + j;
+ const int l = i * dgd_stride + j;
+ const int m = i * dst_stride + j;
+ const int nb = 4;
+ const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5;
+ const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5;
+ const int32_t v = a * dgd[l] + b;
+ dst[m] =
+ ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+ }
+ }
}
- j = width - 1;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+}
+
+static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
+ int dgd_stride, int32_t *dst,
+ int dst_stride, int bit_depth,
+ int sgr_params_idx,
+ int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+ const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
+ // Adjusting the stride of A and B here appears to avoid bad cache effects,
+ // leading to a significant speed improvement.
+ // We also align the stride to a multiple of 16 bytes, for consistency
+ // with the SIMD version of this function.
+ int buf_stride = ((width_ext + 3) & ~3) + 16;
+ int32_t A_[RESTORATION_PROC_UNIT_PELS];
+ int32_t B_[RESTORATION_PROC_UNIT_PELS];
+ int32_t *A = A_;
+ int32_t *B = B_;
+ int i, j;
+
+ assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
+ assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
+ "Need SGRPROJ_BORDER_* >= r+1");
+
+ boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
+ width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
+ boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
+ width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
+ A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+ B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+ // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
+ // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
+ for (i = -1; i < height + 1; ++i) {
+ for (j = -1; j < width + 1; ++j) {
+ const int k = i * buf_stride + j;
+ const int n = (2 * r + 1) * (2 * r + 1);
+
+ // a < 2^16 * n < 2^22 regardless of bit depth
+ uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
+ // b < 2^8 * n < 2^14 regardless of bit depth
+ uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
+
+ // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
+ // and p itself satisfies p < 2^14 * n^2 < 2^26.
+ // This bound on p is due to:
+ // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
+ //
+ // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
+ // This is an artefact of rounding, and can only happen if all pixels
+ // are (almost) identical, so in this case we saturate to p=0.
+ uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
+
+ const uint32_t s = params->s[radius_idx];
+
+ // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
+ // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
+ // (this holds even after accounting for the rounding in s)
+ const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
+
+ // Note: We have to be quite careful about the value of A[k].
+ // This is used as a blend factor between individual pixel values and the
+ // local mean. So it logically has a range of [0, 256], including both
+ // endpoints.
+ //
+ // This is a pain for hardware, as we'd like something which can be stored
+ // in exactly 8 bits.
+ // Further, in the calculation of B[k] below, if z == 0 and r == 2,
+ // then A[k] "should be" 0. But then we can end up setting B[k] to a value
+ // slightly above 2^(8 + bit depth), due to rounding in the value of
+ // one_by_x[25-1].
+ //
+ // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
+ // This fixes the above issues (256 - A[k] fits in a uint8, and we can't
+ // overflow), without significantly affecting the final result: z == 0
+ // implies that the image is essentially "flat", so the local mean and
+ // individual pixel values are very similar.
+ //
+ // Note that saturating on the other side, ie. requring A[k] <= 255,
+ // would be a bad idea, as that corresponds to the case where the image
+ // is very variable, when we want to preserve the local pixel value as
+ // much as possible.
+ A[k] = x_by_xplus1[AOMMIN(z, 255)]; // in range [1, 256]
+
+ // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
+ // one_by_x[n - 1] = round(2^12 / n)
+ // => the product here is < 2^(20 + bit_depth) <= 2^32,
+ // and B[k] is set to a value < 2^(8 + bit depth)
+ // This holds even with the rounding in one_by_x and in the overall
+ // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
+ B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
+ (uint32_t)B[k] *
+ (uint32_t)one_by_x[n - 1],
+ SGRPROJ_RECIP_BITS);
+ }
}
- for (i = 1; i < height - 1; ++i) {
- for (j = 1; j < width - 1; ++j) {
+ // Use the A[] and B[] arrays to calculate the filtered image
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
const int k = i * buf_stride + j;
const int l = i * dgd_stride + j;
const int m = i * dst_stride + j;
@@ -962,968 +911,697 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
}
}
-void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height,
- int stride, int32_t *dst, int dst_stride,
- int r, int eps) {
+void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
+ int dgd_stride, int32_t *flt0, int32_t *flt1,
+ int flt_stride, int sgr_params_idx,
+ int bit_depth, int highbd) {
int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
int32_t *dgd32 =
dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
- int i, j;
- for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
- for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
- dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
- }
- }
- av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
- dst_stride, 8, r, eps);
-}
-void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride,
- int32_t *dst, int dst_stride, int corner, int edge) {
- int i, j;
- const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
-
- i = 0;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
- corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
- }
- i = 0;
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
- corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
- }
- i = height - 1;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
- corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
- }
- i = height - 1;
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
- corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
- }
- i = 0;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
- dgd[k + 1]);
- }
- i = height - 1;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
- dgd[k + 1]);
- }
- j = 0;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- j = width - 1;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- for (i = 1; i < height - 1; ++i) {
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride + 1] + dgd[k + stride + 1]);
+ if (highbd) {
+ const uint16_t *dgd16 = CONVERT_TO_SHORTPTR(dgd8);
+ for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
+ for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
+ dgd32[i * dgd32_stride + j] = dgd16[i * dgd_stride + j];
+ }
+ }
+ } else {
+ for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
+ for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
+ dgd32[i * dgd32_stride + j] = dgd8[i * dgd_stride + j];
+ }
}
}
+
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ // If params->r == 0 we skip the corresponding filter. We only allow one of
+ // the radii to be 0, as having both equal to 0 would be equivalent to
+ // skipping SGR entirely.
+ assert(!(params->r[0] == 0 && params->r[1] == 0));
+
+ if (params->r[0] > 0)
+ selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
+ flt0, flt_stride, bit_depth,
+ sgr_params_idx, 0);
+ if (params->r[1] > 0)
+ selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
+ flt_stride, bit_depth, sgr_params_idx, 1);
}
-void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
- int stride, int eps, int *xqd, uint8_t *dst,
- int dst_stride, int32_t *tmpbuf) {
+void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
+ int stride, int eps, const int *xqd,
+ uint8_t *dst8, int dst_stride,
+ int32_t *tmpbuf, int bit_depth,
+ int highbd) {
+ int32_t *flt0 = tmpbuf;
+ int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
+ assert(width * height <= RESTORATION_UNITPELS_MAX);
+
+ av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
+ eps, bit_depth, highbd);
+ const sgr_params_type *const params = &sgr_params[eps];
int xq[2];
- int32_t *flt1 = tmpbuf;
- int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
- int i, j;
- assert(width * height <= RESTORATION_TILEPELS_MAX);
-#if USE_HIGHPASS_IN_SGRPROJ
- av1_highpass_filter_c(dat, width, height, stride, flt1, width,
- sgr_params[eps].corner, sgr_params[eps].edge);
-#else
- av1_selfguided_restoration_c(dat, width, height, stride, flt1, width,
- sgr_params[eps].r1, sgr_params[eps].e1);
-#endif // USE_HIGHPASS_IN_SGRPROJ
- av1_selfguided_restoration_c(dat, width, height, stride, flt2, width,
- sgr_params[eps].r2, sgr_params[eps].e2);
- decode_xq(xqd, xq);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
+ decode_xq(xqd, xq, params);
+ for (int i = 0; i < height; ++i) {
+ for (int j = 0; j < width; ++j) {
const int k = i * width + j;
- const int l = i * stride + j;
- const int m = i * dst_stride + j;
- const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
- const int32_t f1 = (int32_t)flt1[k] - u;
- const int32_t f2 = (int32_t)flt2[k] - u;
- const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
+ uint8_t *dst8ij = dst8 + i * dst_stride + j;
+ const uint8_t *dat8ij = dat8 + i * stride + j;
+
+ const uint16_t pre_u = highbd ? *CONVERT_TO_SHORTPTR(dat8ij) : *dat8ij;
+ const int32_t u = (int32_t)pre_u << SGRPROJ_RST_BITS;
+ int32_t v = u << SGRPROJ_PRJ_BITS;
+ // If params->r == 0 then we skipped the filtering in
+ // av1_selfguided_restoration_c, i.e. flt[k] == u
+ if (params->r[0] > 0) v += xq[0] * (flt0[k] - u);
+ if (params->r[1] > 0) v += xq[1] * (flt1[k] - u);
const int16_t w =
(int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- dst[m] = clip_pixel(w);
- }
- }
-}
-static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
- int height, int stride,
- RestorationInternal *rst, uint8_t *dst,
- int dst_stride) {
- const int procunit_width = rst->rsi->procunit_width;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int procunit_height;
-#else
- const int procunit_height = rst->rsi->procunit_height;
-#endif
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile(data, tile_idx, width, height, stride, rst, dst, dst_stride);
- return;
- }
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int h = setup_processing_stripe_boundary(
- i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
- procunit_height = h;
-#else
- int h = AOMMIN(procunit_height, limits.v_end - i);
-#endif
- for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
- int w = AOMMIN(procunit_width, limits.h_end - j);
- uint8_t *data_p = data + i * stride + j;
- uint8_t *dst_p = dst + i * dst_stride + j;
- apply_selfguided_restoration(
- data_p, w, h, stride, rst->rsi->sgrproj_info[tile_idx].ep,
- rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
+ const uint16_t out = clip_pixel_highbd(w, bit_depth);
+ if (highbd)
+ *CONVERT_TO_SHORTPTR(dst8ij) = out;
+ else
+ *dst8ij = (uint8_t)out;
}
-#if CONFIG_STRIPED_LOOP_RESTORATION
- restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, data, stride, rst, 0);
-#endif
}
}
-static void loop_sgrproj_filter(uint8_t *data, int width, int height,
- int stride, RestorationInternal *rst,
- uint8_t *dst, int dst_stride) {
- int tile_idx;
- extend_frame(data, width, height, stride, SGRPROJ_BORDER_HORZ,
- SGRPROJ_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
+static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
+ int stripe_width, int stripe_height,
+ int procunit_width, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride,
+ int32_t *tmpbuf, int bit_depth) {
+ (void)bit_depth;
+ assert(bit_depth == 8);
+
+ for (int j = 0; j < stripe_width; j += procunit_width) {
+ int w = AOMMIN(procunit_width, stripe_width - j);
+ apply_selfguided_restoration(src + j, w, stripe_height, src_stride,
+ rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
+ dst + j, dst_stride, tmpbuf, bit_depth, 0);
}
}
-static void loop_switchable_filter(uint8_t *data, int width, int height,
- int stride, RestorationInternal *rst,
- uint8_t *dst, int dst_stride) {
- int tile_idx;
- extend_frame(data, width, height, stride, RESTORATION_BORDER_HORZ,
- RESTORATION_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
- } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
- loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
- } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
- loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
- }
+static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui,
+ int stripe_width, int stripe_height,
+ int procunit_width, const uint8_t *src8,
+ int src_stride, uint8_t *dst8,
+ int dst_stride, int32_t *tmpbuf,
+ int bit_depth) {
+ (void)tmpbuf;
+ const ConvolveParams conv_params = get_conv_params_wiener(bit_depth);
+
+ for (int j = 0; j < stripe_width; j += procunit_width) {
+ int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
+ const uint8_t *src8_p = src8 + j;
+ uint8_t *dst8_p = dst8 + j;
+ av1_highbd_wiener_convolve_add_src(src8_p, src_stride, dst8_p, dst_stride,
+ rui->wiener_info.hfilter, 16,
+ rui->wiener_info.vfilter, 16, w,
+ stripe_height, &conv_params, bit_depth);
}
}
-#if CONFIG_HIGHBITDEPTH
-void extend_frame_highbd(uint16_t *data, int width, int height, int stride,
- int border_horz, int border_vert) {
- uint16_t *data_p;
- int i, j;
- for (i = 0; i < height; ++i) {
- data_p = data + i * stride;
- for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
- for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
- }
- data_p = data - border_horz;
- for (i = -border_vert; i < 0; ++i) {
- memcpy(data_p + i * stride, data_p,
- (width + 2 * border_horz) * sizeof(uint16_t));
- }
- for (i = height; i < height + border_vert; ++i) {
- memcpy(data_p + i * stride, data_p + (height - 1) * stride,
- (width + 2 * border_horz) * sizeof(uint16_t));
+static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
+ int stripe_width, int stripe_height,
+ int procunit_width,
+ const uint8_t *src8, int src_stride,
+ uint8_t *dst8, int dst_stride,
+ int32_t *tmpbuf, int bit_depth) {
+ for (int j = 0; j < stripe_width; j += procunit_width) {
+ int w = AOMMIN(procunit_width, stripe_width - j);
+ apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride,
+ rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
+ dst8 + j, dst_stride, tmpbuf, bit_depth, 1);
}
}
-static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int width,
- int height, int stride,
- RestorationInternal *rst, uint16_t *dst,
- int dst_stride) {
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
- for (int i = limits.v_start; i < limits.v_end; ++i)
- memcpy(dst + i * dst_stride + limits.h_start,
- data + i * stride + limits.h_start,
- (limits.h_end - limits.h_start) * sizeof(*dst));
-}
+typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui,
+ int stripe_width, int stripe_height,
+ int procunit_width, const uint8_t *src,
+ int src_stride, uint8_t *dst, int dst_stride,
+ int32_t *tmpbuf, int bit_depth);
-static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
- int width, int height, int stride,
- RestorationInternal *rst,
- int bit_depth, uint16_t *dst,
- int dst_stride) {
- const int procunit_width = rst->rsi->procunit_width;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int procunit_height;
-#else
- const int procunit_height = rst->rsi->procunit_height;
-#endif
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
+#define NUM_STRIPE_FILTERS 4
+
+static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
+ wiener_filter_stripe, sgrproj_filter_stripe, wiener_filter_stripe_highbd,
+ sgrproj_filter_stripe_highbd
+};
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
+// Filter one restoration unit
+void av1_loop_restoration_filter_unit(
+ const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
+ const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
+ const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
+ int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8,
+ int dst_stride, int32_t *tmpbuf, int optimized_lr) {
+ RestorationType unit_rtype = rui->restoration_type;
+
+ int unit_h = limits->v_end - limits->v_start;
+ int unit_w = limits->h_end - limits->h_start;
+ uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start;
+ uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start;
+
+ if (unit_rtype == RESTORE_NONE) {
+ copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd);
return;
}
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
- InterpKernel vertical_topbot;
-
- // Convolve the whole tile (done in blocks here to match the requirements
- // of the vectorized convolve functions, but the result is equivalent)
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, (uint8_t *)data,
- stride, rst, 1);
- h = ALIGN_POWER_OF_TWO(h, 1);
- procunit_height = h;
-#else
- int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
-#endif
- for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
- int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
- const uint16_t *data_p = data + i * stride + j;
- uint16_t *dst_p = dst + i * dst_stride + j;
- // Note h is at least 16
- for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
- stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
- vertical_topbot, WIENER_BORDER_VERT + b, 1);
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_highbd_convolve8_add_src_hip(
- CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
- dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1, bit_depth);
-#else
- aom_highbd_convolve8_add_src(CONVERT_TO_BYTEPTR(data_p), stride,
- CONVERT_TO_BYTEPTR(dst_p), dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter,
- 16, vertical_topbot, 16, w, 1, bit_depth);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride;
- dst_p += dst_stride;
- }
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_highbd_convolve8_add_src_hip(
- CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
- dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
- rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
- h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2, bit_depth);
-#else
- aom_highbd_convolve8_add_src(
- CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
- dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
- rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
- h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2, bit_depth);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
- dst_p += dst_stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
- for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
- stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
- vertical_topbot, WIENER_BORDER_VERT + b, 0);
-#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- aom_highbd_convolve8_add_src_hip(
- CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
- dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
- vertical_topbot, 16, w, 1, bit_depth);
-#else
- aom_highbd_convolve8_add_src(CONVERT_TO_BYTEPTR(data_p), stride,
- CONVERT_TO_BYTEPTR(dst_p), dst_stride,
- rst->rsi->wiener_info[tile_idx].hfilter,
- 16, vertical_topbot, 16, w, 1, bit_depth);
-#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
- data_p += stride;
- dst_p += dst_stride;
- }
- }
-#if CONFIG_STRIPED_LOOP_RESTORATION
- restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, (uint8_t *)data, stride,
- rst, 1);
-#endif
+
+ const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ);
+ assert(filter_idx < NUM_STRIPE_FILTERS);
+ const stripe_filter_fun stripe_filter = stripe_filters[filter_idx];
+
+ const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
+
+ // Convolve the whole tile one stripe at a time
+ RestorationTileLimits remaining_stripes = *limits;
+ int i = 0;
+ while (i < unit_h) {
+ int copy_above, copy_below;
+ remaining_stripes.v_start = limits->v_start + i;
+
+ get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y, &copy_above,
+ &copy_below);
+
+ const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
+ const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
+
+ // Work out where this stripe's boundaries are within
+ // rsb->stripe_boundary_{above,below}
+ const int tile_stripe =
+ (remaining_stripes.v_start - tile_rect->top + runit_offset) /
+ full_stripe_height;
+ const int frame_stripe = tile_stripe0 + tile_stripe;
+ const int rsb_row = RESTORATION_CTX_VERT * frame_stripe;
+
+ // Calculate this stripe's height, based on two rules:
+ // * The topmost stripe in each tile is 8 luma pixels shorter than usual.
+ // * We can't extend past the end of the current restoration unit
+ const int nominal_stripe_height =
+ full_stripe_height - ((tile_stripe == 0) ? runit_offset : 0);
+ const int h = AOMMIN(nominal_stripe_height,
+ remaining_stripes.v_end - remaining_stripes.v_start);
+
+ setup_processing_stripe_boundary(&remaining_stripes, rsb, rsb_row, highbd,
+ h, data8, stride, rlbs, copy_above,
+ copy_below, optimized_lr);
+
+ stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
+ dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
+
+ restore_processing_stripe_boundary(&remaining_stripes, rlbs, highbd, h,
+ data8, stride, copy_above, copy_below,
+ optimized_lr);
+
+ i += h;
}
}
-static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
- int stride, RestorationInternal *rst,
- int bit_depth, uint8_t *dst8,
- int dst_stride) {
- uint16_t *data = CONVERT_TO_SHORTPTR(data8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- int tile_idx;
- extend_frame_highbd(data, width, height, stride, WIENER_BORDER_HORZ,
- WIENER_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
- bit_depth, dst, dst_stride);
- }
+static void filter_frame_on_tile(int tile_row, int tile_col, void *priv,
+ AV1_COMMON *cm) {
+ (void)tile_col;
+ FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv;
+ ctxt->tile_stripe0 = (tile_row == 0) ? 0 : cm->rst_end_stripe[tile_row - 1];
}
-void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height,
- int stride, int32_t *dst,
- int dst_stride, int bit_depth, int r,
- int eps) {
- int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
- const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
- int32_t *dgd32 =
- dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
- int i, j;
- for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
- for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
- dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
- }
- }
- av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
- dst_stride, bit_depth, r, eps);
+static void filter_frame_on_unit(const RestorationTileLimits *limits,
+ const AV1PixelRect *tile_rect,
+ int rest_unit_idx, void *priv, int32_t *tmpbuf,
+ RestorationLineBuffers *rlbs) {
+ FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv;
+ const RestorationInfo *rsi = ctxt->rsi;
+
+ av1_loop_restoration_filter_unit(
+ limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs, tile_rect,
+ ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->highbd, ctxt->bit_depth,
+ ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf,
+ rsi->optimized_lr);
}
-void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height,
- int stride, int32_t *dst, int dst_stride,
- int corner, int edge) {
- int i, j;
- const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
-
- i = 0;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
- corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
- }
- i = 0;
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
- corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
- }
- i = height - 1;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
- corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
- }
- i = height - 1;
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
- corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
- }
- i = 0;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
- dgd[k + 1]);
- }
- i = height - 1;
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
- dgd[k + 1]);
- }
- j = 0;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- j = width - 1;
- for (i = 1; i < height - 1; ++i) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- for (i = 1; i < height - 1; ++i) {
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride + 1] + dgd[k + stride + 1]);
+void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
+ YV12_BUFFER_CONFIG *frame,
+ AV1_COMMON *cm, int optimized_lr,
+ int num_planes) {
+ const int bit_depth = cm->bit_depth;
+ const int highbd = cm->use_highbitdepth;
+ lr_ctxt->dst = &cm->rst_frame;
+
+ const int frame_width = frame->crop_widths[0];
+ const int frame_height = frame->crop_heights[0];
+ if (aom_realloc_frame_buffer(lr_ctxt->dst, frame_width, frame_height,
+ cm->subsampling_x, cm->subsampling_y,
+ cm->use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->byte_alignment, NULL, NULL, NULL) < 0)
+ aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ "Failed to allocate restoration dst buffer");
+
+ lr_ctxt->on_rest_unit = filter_frame_on_unit;
+ lr_ctxt->frame = frame;
+ for (int plane = 0; plane < num_planes; ++plane) {
+ RestorationInfo *rsi = &cm->rst_info[plane];
+ RestorationType rtype = rsi->frame_restoration_type;
+ rsi->optimized_lr = optimized_lr;
+
+ if (rtype == RESTORE_NONE) {
+ continue;
}
+
+ const int is_uv = plane > 0;
+ const int plane_width = frame->crop_widths[is_uv];
+ const int plane_height = frame->crop_heights[is_uv];
+ FilterFrameCtxt *lr_plane_ctxt = &lr_ctxt->ctxt[plane];
+
+ extend_frame(frame->buffers[plane], plane_width, plane_height,
+ frame->strides[is_uv], RESTORATION_BORDER, RESTORATION_BORDER,
+ highbd);
+
+ lr_plane_ctxt->rsi = rsi;
+ lr_plane_ctxt->ss_x = is_uv && cm->subsampling_x;
+ lr_plane_ctxt->ss_y = is_uv && cm->subsampling_y;
+ lr_plane_ctxt->highbd = highbd;
+ lr_plane_ctxt->bit_depth = bit_depth;
+ lr_plane_ctxt->data8 = frame->buffers[plane];
+ lr_plane_ctxt->dst8 = lr_ctxt->dst->buffers[plane];
+ lr_plane_ctxt->data_stride = frame->strides[is_uv];
+ lr_plane_ctxt->dst_stride = lr_ctxt->dst->strides[is_uv];
+ lr_plane_ctxt->tile_rect = av1_whole_frame_rect(cm, is_uv);
+ filter_frame_on_tile(LR_TILE_ROW, LR_TILE_COL, lr_plane_ctxt, cm);
}
}
-void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
- int stride, int bit_depth, int eps,
- int *xqd, uint16_t *dst,
- int dst_stride, int32_t *tmpbuf) {
- int xq[2];
- int32_t *flt1 = tmpbuf;
- int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
- int i, j;
- assert(width * height <= RESTORATION_TILEPELS_MAX);
-#if USE_HIGHPASS_IN_SGRPROJ
- av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width,
- sgr_params[eps].corner, sgr_params[eps].edge);
-#else
- av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width,
- bit_depth, sgr_params[eps].r1,
- sgr_params[eps].e1);
-#endif // USE_HIGHPASS_IN_SGRPROJ
- av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width,
- bit_depth, sgr_params[eps].r2,
- sgr_params[eps].e2);
- decode_xq(xqd, xq);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int k = i * width + j;
- const int l = i * stride + j;
- const int m = i * dst_stride + j;
- const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
- const int32_t f1 = (int32_t)flt1[k] - u;
- const int32_t f2 = (int32_t)flt2[k] - u;
- const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
- const int16_t w =
- (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
- }
+void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
+ AV1_COMMON *cm, int num_planes) {
+ typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
+ int vstart, int vend);
+ static const copy_fun copy_funs[3] = {
+ aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
+ };
+
+ for (int plane = 0; plane < num_planes; ++plane) {
+ if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+ AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect;
+ copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left,
+ tile_rect.right, tile_rect.top, tile_rect.bottom);
}
}
-static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
- int width, int height, int stride,
- RestorationInternal *rst,
- int bit_depth, uint16_t *dst,
- int dst_stride) {
- const int procunit_width = rst->rsi->procunit_width;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int procunit_height;
-#else
- const int procunit_height = rst->rsi->procunit_height;
-#endif
- const int tile_width = rst->tile_width;
- const int tile_height = rst->tile_height;
+static void foreach_rest_unit_in_planes(AV1LrStruct *lr_ctxt, AV1_COMMON *cm,
+ int num_planes) {
+ FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
- return;
- }
- RestorationTileLimits limits =
- av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- tile_height, width, height, rst->subsampling_y);
-#else
- tile_height, width, height);
-#endif
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, (uint8_t *)data,
- stride, rst, 1);
- procunit_height = h;
-#else
- int h = AOMMIN(procunit_height, limits.v_end - i);
-#endif
- for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
- int w = AOMMIN(procunit_width, limits.h_end - j);
- uint16_t *data_p = data + i * stride + j;
- uint16_t *dst_p = dst + i * dst_stride + j;
- apply_selfguided_restoration_highbd(
- data_p, w, h, stride, bit_depth, rst->rsi->sgrproj_info[tile_idx].ep,
- rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
+ for (int plane = 0; plane < num_planes; ++plane) {
+ if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) {
+ continue;
}
-#if CONFIG_STRIPED_LOOP_RESTORATION
- restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
- limits.h_end, (uint8_t *)data, stride,
- rst, 1);
-#endif
- }
-}
-static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
- int stride, RestorationInternal *rst,
- int bit_depth, uint8_t *dst8,
- int dst_stride) {
- int tile_idx;
- uint16_t *data = CONVERT_TO_SHORTPTR(data8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- extend_frame_highbd(data, width, height, stride, SGRPROJ_BORDER_HORZ,
- SGRPROJ_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
- bit_depth, dst, dst_stride);
+ av1_foreach_rest_unit_in_plane(cm, plane, lr_ctxt->on_rest_unit,
+ &ctxt[plane], &ctxt[plane].tile_rect,
+ cm->rst_tmpbuf, cm->rlbs);
}
}
-static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
- int stride, RestorationInternal *rst,
- int bit_depth, uint8_t *dst8,
- int dst_stride) {
- uint16_t *data = CONVERT_TO_SHORTPTR(data8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- int tile_idx;
- extend_frame_highbd(data, width, height, stride, RESTORATION_BORDER_HORZ,
- RESTORATION_BORDER_VERT);
- for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
- if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
- loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
- dst_stride);
- } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
- loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
- bit_depth, dst, dst_stride);
- } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
- loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride,
- rst, bit_depth, dst, dst_stride);
- }
- }
+void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
+ AV1_COMMON *cm, int optimized_lr,
+ void *lr_ctxt) {
+ assert(!cm->all_lossless);
+ const int num_planes = av1_num_planes(cm);
+
+ AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
+
+ av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
+ optimized_lr, num_planes);
+
+ foreach_rest_unit_in_planes(loop_rest_ctxt, cm, num_planes);
+
+ av1_loop_restoration_copy_planes(loop_rest_ctxt, cm, num_planes);
}
-#endif // CONFIG_HIGHBITDEPTH
-
-static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- int start_mi_row, int end_mi_row,
- int components_pattern, RestorationInfo *rsi,
- YV12_BUFFER_CONFIG *dst) {
- const int ywidth = frame->y_crop_width;
- const int yheight = frame->y_crop_height;
- const int uvwidth = frame->uv_crop_width;
- const int uvheight = frame->uv_crop_height;
- const int ystride = frame->y_stride;
- const int uvstride = frame->uv_stride;
- const int ystart = start_mi_row << MI_SIZE_LOG2;
- const int uvstart = ystart >> cm->subsampling_y;
- int yend = end_mi_row << MI_SIZE_LOG2;
- int uvend = yend >> cm->subsampling_y;
- restore_func_type restore_funcs[RESTORE_TYPES] = {
- NULL, loop_wiener_filter, loop_sgrproj_filter, loop_switchable_filter
- };
-#if CONFIG_HIGHBITDEPTH
- restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = {
- NULL, loop_wiener_filter_highbd, loop_sgrproj_filter_highbd,
- loop_switchable_filter_highbd
- };
-#endif // CONFIG_HIGHBITDEPTH
- restore_func_type restore_func;
-#if CONFIG_HIGHBITDEPTH
- restore_func_highbd_type restore_func_highbd;
-#endif // CONFIG_HIGHBITDEPTH
- YV12_BUFFER_CONFIG dst_;
-
- yend = AOMMIN(yend, yheight);
- uvend = AOMMIN(uvend, uvheight);
- if (components_pattern == (1 << AOM_PLANE_Y)) {
- // Only y
- if (rsi[0].frame_restoration_type == RESTORE_NONE) {
- if (dst) aom_yv12_copy_y(frame, dst);
- return;
- }
- } else if (components_pattern == (1 << AOM_PLANE_U)) {
- // Only U
- if (rsi[1].frame_restoration_type == RESTORE_NONE) {
- if (dst) aom_yv12_copy_u(frame, dst);
- return;
- }
- } else if (components_pattern == (1 << AOM_PLANE_V)) {
- // Only V
- if (rsi[2].frame_restoration_type == RESTORE_NONE) {
- if (dst) aom_yv12_copy_v(frame, dst);
- return;
- }
- } else if (components_pattern ==
- ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) {
- // All components
- if (rsi[0].frame_restoration_type == RESTORE_NONE &&
- rsi[1].frame_restoration_type == RESTORE_NONE &&
- rsi[2].frame_restoration_type == RESTORE_NONE) {
- if (dst) aom_yv12_copy_frame(frame, dst);
- return;
- }
- }
- if (!dst) {
- dst = &dst_;
- memset(dst, 0, sizeof(YV12_BUFFER_CONFIG));
- if (aom_realloc_frame_buffer(
- dst, ywidth, yheight, cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate restoration dst buffer");
- }
+void av1_foreach_rest_unit_in_row(
+ RestorationTileLimits *limits, const AV1PixelRect *tile_rect,
+ rest_unit_visitor_t on_rest_unit, int row_number, int unit_size,
+ int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane,
+ void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs,
+ sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write,
+ struct AV1LrSyncData *const lr_sync) {
+ const int tile_w = tile_rect->right - tile_rect->left;
+ const int ext_size = unit_size * 3 / 2;
+ int x0 = 0, j = 0;
+ while (x0 < tile_w) {
+ int remaining_w = tile_w - x0;
+ int w = (remaining_w < ext_size) ? remaining_w : unit_size;
- if ((components_pattern >> AOM_PLANE_Y) & 1) {
- if (rsi[0].frame_restoration_type != RESTORE_NONE) {
- cm->rst_internal.ntiles = av1_get_rest_ntiles(
- ywidth, yheight, cm->rst_info[AOM_PLANE_Y].restoration_tilesize,
- &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
- &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
- cm->rst_internal.rsi = &rsi[0];
-#if CONFIG_STRIPED_LOOP_RESTORATION
- cm->rst_internal.component = AOM_PLANE_Y;
- cm->rst_internal.subsampling_y = 0;
-#endif
- restore_func =
- restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
-#if CONFIG_HIGHBITDEPTH
- restore_func_highbd =
- restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
- if (cm->use_highbitdepth)
- restore_func_highbd(
- frame->y_buffer + ystart * ystride, ywidth, yend - ystart, ystride,
- &cm->rst_internal, cm->bit_depth,
- dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
- else
-#endif // CONFIG_HIGHBITDEPTH
- restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
- ystride, &cm->rst_internal,
- dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
- } else {
- aom_yv12_copy_y(frame, dst);
- }
- }
+ limits->h_start = tile_rect->left + x0;
+ limits->h_end = tile_rect->left + x0 + w;
+ assert(limits->h_end <= tile_rect->right);
- if ((components_pattern >> AOM_PLANE_U) & 1) {
- if (rsi[AOM_PLANE_U].frame_restoration_type != RESTORE_NONE) {
- cm->rst_internal.ntiles = av1_get_rest_ntiles(
- uvwidth, uvheight, cm->rst_info[AOM_PLANE_U].restoration_tilesize,
- &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
- &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
- cm->rst_internal.rsi = &rsi[AOM_PLANE_U];
-#if CONFIG_STRIPED_LOOP_RESTORATION
- cm->rst_internal.component = AOM_PLANE_U;
- cm->rst_internal.subsampling_y = cm->subsampling_y;
-#endif
- restore_func =
- restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
-#if CONFIG_HIGHBITDEPTH
- restore_func_highbd =
- restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
- if (cm->use_highbitdepth)
- restore_func_highbd(
- frame->u_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
- uvstride, &cm->rst_internal, cm->bit_depth,
- dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
- else
-#endif // CONFIG_HIGHBITDEPTH
- restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
- uvend - uvstart, uvstride, &cm->rst_internal,
- dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
- } else {
- aom_yv12_copy_u(frame, dst);
- }
- }
+ const int unit_idx = unit_idx0 + row_number * hunits_per_tile + j;
- if ((components_pattern >> AOM_PLANE_V) & 1) {
- if (rsi[AOM_PLANE_V].frame_restoration_type != RESTORE_NONE) {
- cm->rst_internal.ntiles = av1_get_rest_ntiles(
- uvwidth, uvheight, cm->rst_info[AOM_PLANE_V].restoration_tilesize,
- &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
- &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
- cm->rst_internal.rsi = &rsi[AOM_PLANE_V];
-#if CONFIG_STRIPED_LOOP_RESTORATION
- cm->rst_internal.component = AOM_PLANE_V;
- cm->rst_internal.subsampling_y = cm->subsampling_y;
-#endif
- restore_func =
- restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
-#if CONFIG_HIGHBITDEPTH
- restore_func_highbd =
- restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
- if (cm->use_highbitdepth)
- restore_func_highbd(
- frame->v_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
- uvstride, &cm->rst_internal, cm->bit_depth,
- dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
- else
-#endif // CONFIG_HIGHBITDEPTH
- restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
- uvend - uvstart, uvstride, &cm->rst_internal,
- dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
- } else {
- aom_yv12_copy_v(frame, dst);
- }
- }
+ // No sync for even numbered rows
+ // For odd numbered rows, Loop Restoration of current block requires the LR
+ // of top-right and bottom-right blocks to be completed
+
+ // top-right sync
+ on_sync_read(lr_sync, row_number, j, plane);
+ if ((row_number + 1) < vunits_per_tile)
+ // bottom-right sync
+ on_sync_read(lr_sync, row_number + 2, j, plane);
+
+ on_rest_unit(limits, tile_rect, unit_idx, priv, tmpbuf, rlbs);
+
+ on_sync_write(lr_sync, row_number, j, hunits_per_tile, plane);
- if (dst == &dst_) {
- if ((components_pattern >> AOM_PLANE_Y) & 1) aom_yv12_copy_y(dst, frame);
- if ((components_pattern >> AOM_PLANE_U) & 1) aom_yv12_copy_u(dst, frame);
- if ((components_pattern >> AOM_PLANE_V) & 1) aom_yv12_copy_v(dst, frame);
- aom_free_frame_buffer(dst);
+ x0 += w;
+ ++j;
}
}
-void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- RestorationInfo *rsi, int components_pattern,
- int partial_frame, YV12_BUFFER_CONFIG *dst) {
- int start_mi_row, end_mi_row, mi_rows_to_filter;
- start_mi_row = 0;
-#if CONFIG_FRAME_SUPERRES
- mi_rows_to_filter =
- ALIGN_POWER_OF_TWO(cm->superres_upscaled_height, 3) >> MI_SIZE_LOG2;
-#else
- mi_rows_to_filter = cm->mi_rows;
-#endif // CONFIG_FRAME_SUPERRES
- if (partial_frame && mi_rows_to_filter > 8) {
- start_mi_row = mi_rows_to_filter >> 1;
- start_mi_row &= 0xfffffff8;
- mi_rows_to_filter = AOMMAX(mi_rows_to_filter / 8, 8);
+void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane) {
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)plane;
+}
+
+void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane) {
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+ (void)plane;
+}
+
+static void foreach_rest_unit_in_tile(
+ const AV1PixelRect *tile_rect, int tile_row, int tile_col, int tile_cols,
+ int hunits_per_tile, int vunits_per_tile, int units_per_tile, int unit_size,
+ int ss_y, int plane, rest_unit_visitor_t on_rest_unit, void *priv,
+ int32_t *tmpbuf, RestorationLineBuffers *rlbs) {
+ const int tile_h = tile_rect->bottom - tile_rect->top;
+ const int ext_size = unit_size * 3 / 2;
+
+ const int tile_idx = tile_col + tile_row * tile_cols;
+ const int unit_idx0 = tile_idx * units_per_tile;
+
+ int y0 = 0, i = 0;
+ while (y0 < tile_h) {
+ int remaining_h = tile_h - y0;
+ int h = (remaining_h < ext_size) ? remaining_h : unit_size;
+
+ RestorationTileLimits limits;
+ limits.v_start = tile_rect->top + y0;
+ limits.v_end = tile_rect->top + y0 + h;
+ assert(limits.v_end <= tile_rect->bottom);
+ // Offset the tile upwards to align with the restoration processing stripe
+ const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
+ limits.v_start = AOMMAX(tile_rect->top, limits.v_start - voffset);
+ if (limits.v_end < tile_rect->bottom) limits.v_end -= voffset;
+
+ av1_foreach_rest_unit_in_row(
+ &limits, tile_rect, on_rest_unit, i, unit_size, unit_idx0,
+ hunits_per_tile, vunits_per_tile, plane, priv, tmpbuf, rlbs,
+ av1_lr_sync_read_dummy, av1_lr_sync_write_dummy, NULL);
+
+ y0 += h;
+ ++i;
}
- end_mi_row = start_mi_row + mi_rows_to_filter;
- loop_restoration_init(&cm->rst_internal, cm->frame_type == KEY_FRAME);
- loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, components_pattern,
- rsi, dst);
+}
+
+void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
+ rest_unit_visitor_t on_rest_unit,
+ void *priv, AV1PixelRect *tile_rect,
+ int32_t *tmpbuf,
+ RestorationLineBuffers *rlbs) {
+ const int is_uv = plane > 0;
+ const int ss_y = is_uv && cm->subsampling_y;
+
+ const RestorationInfo *rsi = &cm->rst_info[plane];
+
+ foreach_rest_unit_in_tile(tile_rect, LR_TILE_ROW, LR_TILE_COL, LR_TILE_COLS,
+ rsi->horz_units_per_tile, rsi->vert_units_per_tile,
+ rsi->units_per_tile, rsi->restoration_unit_size,
+ ss_y, plane, on_rest_unit, priv, tmpbuf, rlbs);
}
int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int *rcol0, int *rcol1, int *rrow0,
- int *rrow1, int *nhtiles) {
- assert(rcol0 && rcol1 && rrow0 && rrow1 && nhtiles);
-
- if (bsize != cm->sb_size) return 0;
-
-#if CONFIG_FRAME_SUPERRES
- const int frame_w = cm->superres_upscaled_width;
- const int frame_h = cm->superres_upscaled_height;
- const int mi_to_px = MI_SIZE * SCALE_NUMERATOR;
- const int denom = cm->superres_scale_denominator;
-#else
- const int frame_w = cm->width;
- const int frame_h = cm->height;
- const int mi_to_px = MI_SIZE;
- const int denom = 1;
-#endif // CONFIG_FRAME_SUPERRES
-
- const int ss_x = plane > 0 && cm->subsampling_x != 0;
- const int ss_y = plane > 0 && cm->subsampling_y != 0;
-
- const int ss_frame_w = (frame_w + ss_x) >> ss_x;
- const int ss_frame_h = (frame_h + ss_y) >> ss_y;
-
- int rtile_w, rtile_h, nvtiles;
- av1_get_rest_ntiles(ss_frame_w, ss_frame_h,
- cm->rst_info[plane].restoration_tilesize, &rtile_w,
- &rtile_h, nhtiles, &nvtiles);
-
- const int rnd_w = rtile_w * denom - 1;
- const int rnd_h = rtile_h * denom - 1;
-
- // rcol0/rrow0 should be the first column/row of rtiles that doesn't start
- // left/below of mi_col/mi_row. For this calculation, we need to round up the
- // division (if the sb starts at rtile column 10.1, the first matching rtile
- // has column index 11)
- *rcol0 = (mi_col * mi_to_px + rnd_w) / (rtile_w * denom);
- *rrow0 = (mi_row * mi_to_px + rnd_h) / (rtile_h * denom);
-
- // rcol1/rrow1 is the equivalent calculation, but for the superblock
- // below-right. There are some slightly strange boundary effects. First, we
- // need to clamp to nhtiles/nvtiles for the case where it appears there are,
- // say, 2.4 restoration tiles horizontally. There we need a maximum mi_row1
- // of 2 because tile 1 gets extended.
- //
- // Second, if mi_col1 >= cm->mi_cols then we must manually set *rcol1 to
- // nhtiles. This is needed whenever the frame's width rounded up to the next
- // toplevel superblock is smaller than nhtiles * rtile_w. The same logic is
- // needed for rows.
- const int mi_row1 = mi_row + mi_size_high[bsize];
- const int mi_col1 = mi_col + mi_size_wide[bsize];
-
- if (mi_col1 >= cm->mi_cols)
- *rcol1 = *nhtiles;
- else
- *rcol1 = AOMMIN(*nhtiles, (mi_col1 * mi_to_px + rnd_w) / (rtile_w * denom));
+ int *rrow1, int *tile_tl_idx) {
+ assert(rcol0 && rcol1 && rrow0 && rrow1);
- if (mi_row1 >= cm->mi_rows)
- *rrow1 = nvtiles;
- else
- *rrow1 = AOMMIN(nvtiles, (mi_row1 * mi_to_px + rnd_h) / (rtile_h * denom));
+ if (bsize != cm->seq_params.sb_size) return 0;
+ if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) return 0;
+
+ assert(!cm->all_lossless);
+
+ const int is_uv = plane > 0;
+
+ const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
+ const int tile_w = tile_rect.right - tile_rect.left;
+ const int tile_h = tile_rect.bottom - tile_rect.top;
+
+ const int mi_top = 0;
+ const int mi_left = 0;
+
+ // Compute the mi-unit corners of the superblock relative to the top-left of
+ // the tile
+ const int mi_rel_row0 = mi_row - mi_top;
+ const int mi_rel_col0 = mi_col - mi_left;
+ const int mi_rel_row1 = mi_rel_row0 + mi_size_high[bsize];
+ const int mi_rel_col1 = mi_rel_col0 + mi_size_wide[bsize];
+
+ const RestorationInfo *rsi = &cm->rst_info[plane];
+ const int size = rsi->restoration_unit_size;
+
+ // Calculate the number of restoration units in this tile (which might be
+ // strictly less than rsi->horz_units_per_tile and rsi->vert_units_per_tile)
+ const int horz_units = av1_lr_count_units_in_tile(size, tile_w);
+ const int vert_units = av1_lr_count_units_in_tile(size, tile_h);
+
+ // The size of an MI-unit on this plane of the image
+ const int ss_x = is_uv && cm->subsampling_x;
+ const int ss_y = is_uv && cm->subsampling_y;
+ const int mi_size_x = MI_SIZE >> ss_x;
+ const int mi_size_y = MI_SIZE >> ss_y;
+
+ // Write m for the relative mi column or row, D for the superres denominator
+ // and N for the superres numerator. If u is the upscaled pixel offset then
+ // we can write the downscaled pixel offset in two ways as:
+ //
+ // MI_SIZE * m = N / D u
+ //
+ // from which we get u = D * MI_SIZE * m / N
+ const int mi_to_num_x = av1_superres_scaled(cm)
+ ? mi_size_x * cm->superres_scale_denominator
+ : mi_size_x;
+ const int mi_to_num_y = mi_size_y;
+ const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
+ const int denom_y = size;
+
+ const int rnd_x = denom_x - 1;
+ const int rnd_y = denom_y - 1;
+
+ // rcol0/rrow0 should be the first column/row of restoration units (relative
+ // to the top-left of the tile) that doesn't start left/below of
+ // mi_col/mi_row. For this calculation, we need to round up the division (if
+ // the sb starts at runit column 10.1, the first matching runit has column
+ // index 11)
+ *rcol0 = (mi_rel_col0 * mi_to_num_x + rnd_x) / denom_x;
+ *rrow0 = (mi_rel_row0 * mi_to_num_y + rnd_y) / denom_y;
+
+ // rel_col1/rel_row1 is the equivalent calculation, but for the superblock
+ // below-right. If we're at the bottom or right of the tile, this restoration
+ // unit might not exist, in which case we'll clamp accordingly.
+ *rcol1 = AOMMIN((mi_rel_col1 * mi_to_num_x + rnd_x) / denom_x, horz_units);
+ *rrow1 = AOMMIN((mi_rel_row1 * mi_to_num_y + rnd_y) / denom_y, vert_units);
+
+ const int tile_idx = 0;
+ *tile_tl_idx = tile_idx * rsi->units_per_tile;
return *rcol0 < *rcol1 && *rrow0 < *rrow1;
}
-#if CONFIG_STRIPED_LOOP_RESTORATION
-
// Extend to left and right
-static void extend_line(uint8_t *buf, int width, int extend,
- int use_highbitdepth) {
- int i;
- if (use_highbitdepth) {
- uint16_t val, *buf16 = (uint16_t *)buf;
- val = buf16[0];
- for (i = 0; i < extend; i++) buf16[-1 - i] = val;
- val = buf16[width - 1];
- for (i = 0; i < extend; i++) buf16[width + i] = val;
+static void extend_lines(uint8_t *buf, int width, int height, int stride,
+ int extend, int use_highbitdepth) {
+ for (int i = 0; i < height; ++i) {
+ if (use_highbitdepth) {
+ uint16_t *buf16 = (uint16_t *)buf;
+ aom_memset16(buf16 - extend, buf16[0], extend);
+ aom_memset16(buf16 + width, buf16[width - 1], extend);
+ } else {
+ memset(buf - extend, buf[0], extend);
+ memset(buf + width, buf[width - 1], extend);
+ }
+ buf += stride;
+ }
+}
+
+static void save_deblock_boundary_lines(
+ const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm, int plane, int row,
+ int stripe, int use_highbd, int is_above,
+ RestorationStripeBoundaries *boundaries) {
+ const int is_uv = plane > 0;
+ const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
+ const int src_stride = frame->strides[is_uv] << use_highbd;
+ const uint8_t *src_rows = src_buf + row * src_stride;
+
+ uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
+ : boundaries->stripe_boundary_below;
+ uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
+ const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
+ uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
+
+ // There is a rare case in which a processing stripe can end 1px above the
+ // crop border. In this case, we do want to use deblocked pixels from below
+ // the stripe (hence why we ended up in this function), but instead of
+ // fetching 2 "below" rows we need to fetch one and duplicate it.
+ // This is equivalent to clamping the sample locations against the crop border
+ const int lines_to_save =
+ AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row);
+ assert(lines_to_save == 1 || lines_to_save == 2);
+
+ int upscaled_width;
+ int line_bytes;
+ if (av1_superres_scaled(cm)) {
+ const int ss_x = is_uv && cm->subsampling_x;
+ upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
+ line_bytes = upscaled_width << use_highbd;
+ if (use_highbd)
+ av1_upscale_normative_rows(
+ cm, CONVERT_TO_BYTEPTR(src_rows), frame->strides[is_uv],
+ CONVERT_TO_BYTEPTR(bdry_rows), boundaries->stripe_boundary_stride,
+ plane, lines_to_save);
+ else
+ av1_upscale_normative_rows(cm, src_rows, frame->strides[is_uv], bdry_rows,
+ boundaries->stripe_boundary_stride, plane,
+ lines_to_save);
} else {
- uint8_t val;
- val = buf[0];
- for (i = 0; i < extend; i++) buf[-1 - i] = val;
- val = buf[width - 1];
- for (i = 0; i < extend; i++) buf[width + i] = val;
+ upscaled_width = frame->crop_widths[is_uv];
+ line_bytes = upscaled_width << use_highbd;
+ for (int i = 0; i < lines_to_save; i++) {
+ memcpy(bdry_rows + i * bdry_stride, src_rows + i * src_stride,
+ line_bytes);
+ }
}
+ // If we only saved one line, then copy it into the second line buffer
+ if (lines_to_save == 1)
+ memcpy(bdry_rows + bdry_stride, bdry_rows, line_bytes);
+
+ extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
+ RESTORATION_EXTRA_HORZ, use_highbd);
+}
+
+static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame,
+ const AV1_COMMON *cm, int plane, int row,
+ int stripe, int use_highbd, int is_above,
+ RestorationStripeBoundaries *boundaries) {
+ const int is_uv = plane > 0;
+ const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
+ const int src_stride = frame->strides[is_uv] << use_highbd;
+ const uint8_t *src_rows = src_buf + row * src_stride;
+
+ uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
+ : boundaries->stripe_boundary_below;
+ uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
+ const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
+ uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
+ const int src_width = frame->crop_widths[is_uv];
+
+ // At the point where this function is called, we've already applied
+ // superres. So we don't need to extend the lines here, we can just
+ // pull directly from the topmost row of the upscaled frame.
+ const int ss_x = is_uv && cm->subsampling_x;
+ const int upscaled_width = av1_superres_scaled(cm)
+ ? (cm->superres_upscaled_width + ss_x) >> ss_x
+ : src_width;
+ const int line_bytes = upscaled_width << use_highbd;
+ for (int i = 0; i < RESTORATION_CTX_VERT; i++) {
+ // Copy the line at 'row' into both context lines. This is because
+ // we want to (effectively) extend the outermost row of CDEF data
+ // from this tile to produce a border, rather than using deblocked
+ // pixels from the tile above/below.
+ memcpy(bdry_rows + i * bdry_stride, src_rows, line_bytes);
+ }
+ extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
+ RESTORATION_EXTRA_HORZ, use_highbd);
}
-// For each 64 pixel high stripe, save 4 scan lines to be used as boundary in
-// the loop restoration process. The lines are saved in
-// rst_internal.stripe_boundary_lines
-void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
- AV1_COMMON *cm) {
- int p, boundary_stride;
- int src_width, src_height, src_stride, stripe_height, stripe_offset, stripe_y,
- yy;
- uint8_t *src_buf, *boundary_below_buf, *boundary_above_buf;
- int use_highbitdepth = 0;
- for (p = 0; p < MAX_MB_PLANE; ++p) {
- if (p == 0) {
- src_buf = frame->y_buffer;
- src_width = frame->y_crop_width;
- src_height = frame->y_crop_height;
- src_stride = frame->y_stride;
- stripe_height = 64;
- stripe_offset = 56 - 2; // offset of first line to copy
+static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame,
+ int use_highbd, int plane,
+ AV1_COMMON *cm, int after_cdef) {
+ const int is_uv = plane > 0;
+ const int ss_y = is_uv && cm->subsampling_y;
+ const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
+ const int stripe_off = RESTORATION_UNIT_OFFSET >> ss_y;
+
+ // Get the tile rectangle, with height rounded up to the next multiple of 8
+ // luma pixels (only relevant for the bottom tile of the frame)
+ const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
+ const int stripe0 = 0;
+
+ RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries;
+
+ const int plane_height = ROUND_POWER_OF_TWO(cm->height, ss_y);
+
+ int tile_stripe;
+ for (tile_stripe = 0;; ++tile_stripe) {
+ const int rel_y0 = AOMMAX(0, tile_stripe * stripe_height - stripe_off);
+ const int y0 = tile_rect.top + rel_y0;
+ if (y0 >= tile_rect.bottom) break;
+
+ const int rel_y1 = (tile_stripe + 1) * stripe_height - stripe_off;
+ const int y1 = AOMMIN(tile_rect.top + rel_y1, tile_rect.bottom);
+
+ const int frame_stripe = stripe0 + tile_stripe;
+
+ // In this case, we should only use CDEF pixels at the top
+ // and bottom of the frame as a whole; internal tile boundaries
+ // can use deblocked pixels from adjacent tiles for context.
+ const int use_deblock_above = (frame_stripe > 0);
+ const int use_deblock_below = (y1 < plane_height);
+
+ if (!after_cdef) {
+ // Save deblocked context where needed.
+ if (use_deblock_above) {
+ save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
+ frame_stripe, use_highbd, 1, boundaries);
+ }
+ if (use_deblock_below) {
+ save_deblock_boundary_lines(frame, cm, plane, y1, frame_stripe,
+ use_highbd, 0, boundaries);
+ }
} else {
- src_buf = p == 1 ? frame->u_buffer : frame->v_buffer;
- src_width = frame->uv_crop_width;
- src_height = frame->uv_crop_height;
- src_stride = frame->uv_stride;
- stripe_height = 64 >> cm->subsampling_y;
- stripe_offset = (56 >> cm->subsampling_y) - 2;
- }
- boundary_above_buf = cm->rst_internal.stripe_boundary_above[p];
- boundary_below_buf = cm->rst_internal.stripe_boundary_below[p];
- boundary_stride = cm->rst_internal.stripe_boundary_stride[p];
-#if CONFIG_HIGHBITDEPTH
- use_highbitdepth = cm->use_highbitdepth;
- if (use_highbitdepth) {
- src_buf = (uint8_t *)CONVERT_TO_SHORTPTR(src_buf);
- }
-#endif
- src_buf += (stripe_offset * src_stride) << use_highbitdepth;
- boundary_above_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
- boundary_below_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
- // Loop over stripes
- for (stripe_y = stripe_offset; stripe_y < src_height;
- stripe_y += stripe_height) {
- // Save 2 lines above the LR stripe (offset -9, -10)
- for (yy = 0; yy < 2; yy++) {
- if (stripe_y + yy < src_height) {
- memcpy(boundary_above_buf, src_buf, src_width << use_highbitdepth);
- extend_line(boundary_above_buf, src_width, RESTORATION_EXTRA_HORZ,
- use_highbitdepth);
- src_buf += src_stride << use_highbitdepth;
- boundary_above_buf += boundary_stride << use_highbitdepth;
- }
+ // Save CDEF context where needed. Note that we need to save the CDEF
+ // context for a particular boundary iff we *didn't* save deblocked
+ // context for that boundary.
+ //
+ // In addition, we need to save copies of the outermost line within
+ // the tile, rather than using data from outside the tile.
+ if (!use_deblock_above) {
+ save_cdef_boundary_lines(frame, cm, plane, y0, frame_stripe, use_highbd,
+ 1, boundaries);
}
- // Save 2 lines below the LR stripe (offset 56,57)
- for (yy = 2; yy < 4; yy++) {
- if (stripe_y + yy < src_height) {
- memcpy(boundary_below_buf, src_buf, src_width << use_highbitdepth);
- extend_line(boundary_below_buf, src_width, RESTORATION_EXTRA_HORZ,
- use_highbitdepth);
- src_buf += src_stride << use_highbitdepth;
- boundary_below_buf += boundary_stride << use_highbitdepth;
- }
+ if (!use_deblock_below) {
+ save_cdef_boundary_lines(frame, cm, plane, y1 - 1, frame_stripe,
+ use_highbd, 0, boundaries);
}
- // jump to next stripe
- src_buf += ((stripe_height - 4) * src_stride) << use_highbitdepth;
}
}
}
-#endif // CONFIG_STRIPED_LOOP_RESTORATION
+// For each RESTORATION_PROC_UNIT_SIZE pixel high stripe, save 4 scan
+// lines to be used as boundary in the loop restoration process. The
+// lines are saved in rst_internal.stripe_boundary_lines
+void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
+ AV1_COMMON *cm, int after_cdef) {
+ const int num_planes = av1_num_planes(cm);
+ const int use_highbd = cm->use_highbitdepth;
+ for (int p = 0; p < num_planes; ++p) {
+ save_tile_row_boundary_lines(frame, use_highbd, p, cm, after_cdef);
+ }
+}
diff --git a/third_party/aom/av1/common/restoration.h b/third_party/aom/av1/common/restoration.h
index 23a53879e..0c4017534 100644
--- a/third_party/aom/av1/common/restoration.h
+++ b/third_party/aom/av1/common/restoration.h
@@ -13,9 +13,10 @@
#define AV1_COMMON_RESTORATION_H_
#include "aom_ports/mem.h"
-#include "./aom_config.h"
+#include "config/aom_config.h"
#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
#ifdef __cplusplus
extern "C" {
@@ -26,23 +27,13 @@ extern "C" {
#define RESTORATION_PROC_UNIT_SIZE 64
-#if CONFIG_STRIPED_LOOP_RESTORATION
// Filter tile grid offset upwards compared to the superblock grid
-#define RESTORATION_TILE_OFFSET 8
-#endif
+#define RESTORATION_UNIT_OFFSET 8
-#if CONFIG_STRIPED_LOOP_RESTORATION
-#define SGRPROJ_BORDER_VERT 2 // Vertical border used for Sgr
-#else
-#define SGRPROJ_BORDER_VERT 1 // Vertical border used for Sgr
-#endif
-#define SGRPROJ_BORDER_HORZ 2 // Horizontal border used for Sgr
+#define SGRPROJ_BORDER_VERT 3 // Vertical border used for Sgr
+#define SGRPROJ_BORDER_HORZ 3 // Horizontal border used for Sgr
-#if CONFIG_STRIPED_LOOP_RESTORATION
#define WIENER_BORDER_VERT 2 // Vertical border used for Wiener
-#else
-#define WIENER_BORDER_VERT 1 // Vertical border used for Wiener
-#endif
#define WIENER_HALFWIN 3
#define WIENER_BORDER_HORZ (WIENER_HALFWIN) // Horizontal border for Wiener
@@ -61,11 +52,16 @@ extern "C" {
#define RESTORATION_BORDER_HORZ (WIENER_BORDER_HORZ)
#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
-#if CONFIG_STRIPED_LOOP_RESTORATION
+// How many border pixels do we need for each processing unit?
+#define RESTORATION_BORDER 3
+
+// How many rows of deblocked pixels do we save above/below each processing
+// stripe?
+#define RESTORATION_CTX_VERT 2
+
// Additional pixels to the left and right in above/below buffers
// It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment
#define RESTORATION_EXTRA_HORZ 4
-#endif
// Pad up to 20 more (may be much less is needed)
#define RESTORATION_PADDING 20
@@ -75,30 +71,23 @@ extern "C" {
(RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \
RESTORATION_PADDING))
-#define RESTORATION_TILESIZE_MAX 256
-#if CONFIG_STRIPED_LOOP_RESTORATION
-#define RESTORATION_TILEPELS_HORZ_MAX \
- (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
-#define RESTORATION_TILEPELS_VERT_MAX \
- ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \
- RESTORATION_TILE_OFFSET))
-#define RESTORATION_TILEPELS_MAX \
- (RESTORATION_TILEPELS_HORZ_MAX * RESTORATION_TILEPELS_VERT_MAX)
-#else
-#define RESTORATION_TILEPELS_MAX \
- ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) * \
- (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT))
-#endif
+#define RESTORATION_UNITSIZE_MAX 256
+#define RESTORATION_UNITPELS_HORZ_MAX \
+ (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
+#define RESTORATION_UNITPELS_VERT_MAX \
+ ((RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \
+ RESTORATION_UNIT_OFFSET))
+#define RESTORATION_UNITPELS_MAX \
+ (RESTORATION_UNITPELS_HORZ_MAX * RESTORATION_UNITPELS_VERT_MAX)
// Two 32-bit buffers needed for the restored versions from two filters
// TODO(debargha, rupert): Refactor to not need the large tilesize to be stored
// on the decoder side.
-#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 2 * sizeof(int32_t))
+#define SGRPROJ_TMPBUF_SIZE (RESTORATION_UNITPELS_MAX * 2 * sizeof(int32_t))
#define SGRPROJ_EXTBUF_SIZE (0)
#define SGRPROJ_PARAMS_BITS 4
#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
-#define USE_HIGHPASS_IN_SGRPROJ 0
// Precision bits for projection
#define SGRPROJ_PRJ_BITS 7
@@ -108,24 +97,16 @@ extern "C" {
#define SGRPROJ_SGR_BITS 8
#define SGRPROJ_SGR (1 << SGRPROJ_SGR_BITS)
-#if USE_HIGHPASS_IN_SGRPROJ
-#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) / 8)
-#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1)
-#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 2)
-#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1)
-#else
#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) * 3 / 4)
#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1)
#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 4)
#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1)
-#endif // USE_HIGHPASS_IN_SGRPROJ
#define SGRPROJ_PRJ_SUBEXP_K 4
#define SGRPROJ_BITS (SGRPROJ_PRJ_BITS * 2 + SGRPROJ_PARAMS_BITS)
#define MAX_RADIUS 2 // Only 1, 2, 3 allowed
-#define MAX_EPS 80 // Max value of eps
#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
#define SGRPROJ_MTABLE_BITS 20
#define SGRPROJ_RECIP_BITS 12
@@ -143,17 +124,13 @@ extern "C" {
#define WIENER_FILT_PREC_BITS 7
#define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS)
-// Whether to use high intermediate precision filtering
-#define USE_WIENER_HIGH_INTERMEDIATE_PRECISION 1
-
// Central values for the taps
#define WIENER_FILT_TAP0_MIDV (3)
#define WIENER_FILT_TAP1_MIDV (-7)
#define WIENER_FILT_TAP2_MIDV (15)
-#define WIENER_FILT_TAP3_MIDV \
- (WIENER_FILT_STEP - \
- 2 * (WIENER_FILT_TAP0_MIDV + WIENER_FILT_TAP1_MIDV + \
- WIENER_FILT_TAP2_MIDV))
+#define WIENER_FILT_TAP3_MIDV \
+ (WIENER_FILT_STEP - 2 * (WIENER_FILT_TAP0_MIDV + WIENER_FILT_TAP1_MIDV + \
+ WIENER_FILT_TAP2_MIDV))
#define WIENER_FILT_TAP0_BITS 4
#define WIENER_FILT_TAP1_BITS 5
@@ -194,51 +171,64 @@ extern "C" {
#error "Wiener filter currently only works if WIENER_FILT_PREC_BITS == 7"
#endif
+#define LR_TILE_ROW 0
+#define LR_TILE_COL 0
+#define LR_TILE_COLS 1
+
typedef struct {
-#if USE_HIGHPASS_IN_SGRPROJ
- int corner;
- int edge;
-#else
- int r1;
- int e1;
-#endif // USE_HIGHPASS_IN_SGRPROJ
- int r2;
- int e2;
+ int r[2]; // radii
+ int s[2]; // sgr parameters for r[0] and r[1], based on GenSgrprojVtable()
} sgr_params_type;
typedef struct {
- int restoration_tilesize;
- int procunit_width, procunit_height;
- RestorationType frame_restoration_type;
- RestorationType *restoration_type;
- // Wiener filter
- WienerInfo *wiener_info;
- // Selfguided proj filter
- SgrprojInfo *sgrproj_info;
-} RestorationInfo;
+ RestorationType restoration_type;
+ WienerInfo wiener_info;
+ SgrprojInfo sgrproj_info;
+} RestorationUnitInfo;
+
+// A restoration line buffer needs space for two lines plus a horizontal filter
+// margin of RESTORATION_EXTRA_HORZ on each side.
+#define RESTORATION_LINEBUFFER_WIDTH \
+ (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_EXTRA_HORZ)
+
+// Similarly, the column buffers (used when we're at a vertical tile edge
+// that we can't filter across) need space for one processing unit's worth
+// of pixels, plus the top/bottom border width
+#define RESTORATION_COLBUFFER_HEIGHT \
+ (RESTORATION_PROC_UNIT_SIZE + 2 * RESTORATION_BORDER)
typedef struct {
- RestorationInfo *rsi;
- int keyframe;
- int ntiles;
- int tile_width, tile_height;
- int nhtiles, nvtiles;
- int32_t *tmpbuf;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int component;
- int subsampling_y;
- uint8_t *stripe_boundary_above[MAX_MB_PLANE];
- uint8_t *stripe_boundary_below[MAX_MB_PLANE];
- int stripe_boundary_stride[MAX_MB_PLANE];
- // Temporary buffers to save/restore 2 lines above/below the restoration
- // stripe
- // Allow for filter margin to left and right
- uint16_t
- tmp_save_above[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
- uint16_t
- tmp_save_below[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
-#endif
-} RestorationInternal;
+ // Temporary buffers to save/restore 3 lines above/below the restoration
+ // stripe.
+ uint16_t tmp_save_above[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
+ uint16_t tmp_save_below[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
+} RestorationLineBuffers;
+
+typedef struct {
+ uint8_t *stripe_boundary_above;
+ uint8_t *stripe_boundary_below;
+ int stripe_boundary_stride;
+ int stripe_boundary_size;
+} RestorationStripeBoundaries;
+
+typedef struct {
+ RestorationType frame_restoration_type;
+ int restoration_unit_size;
+
+ // Fields below here are allocated and initialised by
+ // av1_alloc_restoration_struct. (horz_)units_per_tile give the number of
+ // restoration units in (one row of) the largest tile in the frame. The data
+ // in unit_info is laid out with units_per_tile entries for each tile, which
+ // have stride horz_units_per_tile.
+ //
+ // Even if there are tiles of different sizes, the data in unit_info is laid
+ // out as if all tiles are of full size.
+ int units_per_tile;
+ int vert_units_per_tile, horz_units_per_tile;
+ RestorationUnitInfo *unit_info;
+ RestorationStripeBoundaries boundaries;
+ int optimized_lr;
+} RestorationInfo;
static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) {
sgrproj_info->xqd[0] = (SGRPROJ_PRJ_MIN0 + SGRPROJ_PRJ_MAX0) / 2;
@@ -257,91 +247,128 @@ static INLINE void set_default_wiener(WienerInfo *wiener_info) {
wiener_info->vfilter[6] = wiener_info->hfilter[6] = WIENER_FILT_TAP0_MIDV;
}
-static INLINE int av1_get_rest_ntiles(int width, int height, int tilesize,
- int *tile_width, int *tile_height,
- int *nhtiles, int *nvtiles) {
- int nhtiles_, nvtiles_;
- int tile_width_, tile_height_;
- tile_width_ = (tilesize < 0) ? width : AOMMIN(tilesize, width);
- tile_height_ = (tilesize < 0) ? height : AOMMIN(tilesize, height);
- assert(tile_width_ > 0 && tile_height_ > 0);
-
- nhtiles_ = (width + (tile_width_ >> 1)) / tile_width_;
- nvtiles_ = (height + (tile_height_ >> 1)) / tile_height_;
- if (tile_width) *tile_width = tile_width_;
- if (tile_height) *tile_height = tile_height_;
- if (nhtiles) *nhtiles = nhtiles_;
- if (nvtiles) *nvtiles = nvtiles_;
- return (nhtiles_ * nvtiles_);
-}
-
-typedef struct { int h_start, h_end, v_start, v_end; } RestorationTileLimits;
-
-static INLINE RestorationTileLimits
-av1_get_rest_tile_limits(int tile_idx, int nhtiles, int nvtiles, int tile_width,
- int tile_height, int im_width,
-#if CONFIG_STRIPED_LOOP_RESTORATION
- int im_height, int subsampling_y) {
-#else
- int im_height) {
-#endif
- const int htile_idx = tile_idx % nhtiles;
- const int vtile_idx = tile_idx / nhtiles;
- RestorationTileLimits limits;
- limits.h_start = htile_idx * tile_width;
- limits.v_start = vtile_idx * tile_height;
- limits.h_end =
- (htile_idx < nhtiles - 1) ? limits.h_start + tile_width : im_width;
- limits.v_end =
- (vtile_idx < nvtiles - 1) ? limits.v_start + tile_height : im_height;
-#if CONFIG_STRIPED_LOOP_RESTORATION
- // Offset the tile upwards to align with the restoration processing stripe
- limits.v_start -= RESTORATION_TILE_OFFSET >> subsampling_y;
- if (limits.v_start < 0) limits.v_start = 0;
- if (limits.v_end < im_height)
- limits.v_end -= RESTORATION_TILE_OFFSET >> subsampling_y;
-#endif
- return limits;
-}
+typedef struct {
+ int h_start, h_end, v_start, v_end;
+} RestorationTileLimits;
+
+typedef void (*rest_unit_visitor_t)(const RestorationTileLimits *limits,
+ const AV1PixelRect *tile_rect,
+ int rest_unit_idx, void *priv,
+ int32_t *tmpbuf,
+ RestorationLineBuffers *rlbs);
+
+typedef struct FilterFrameCtxt {
+ const RestorationInfo *rsi;
+ int tile_stripe0;
+ int ss_x, ss_y;
+ int highbd, bit_depth;
+ uint8_t *data8, *dst8;
+ int data_stride, dst_stride;
+ AV1PixelRect tile_rect;
+} FilterFrameCtxt;
+
+typedef struct AV1LrStruct {
+ rest_unit_visitor_t on_rest_unit;
+ FilterFrameCtxt ctxt[MAX_MB_PLANE];
+ YV12_BUFFER_CONFIG *frame;
+ YV12_BUFFER_CONFIG *dst;
+} AV1LrStruct;
extern const sgr_params_type sgr_params[SGRPROJ_PARAMS];
-extern int sgrproj_mtable[MAX_EPS][MAX_NELEM];
+extern int sgrproj_mtable[SGRPROJ_PARAMS][2];
extern const int32_t x_by_xplus1[256];
extern const int32_t one_by_x[MAX_NELEM];
-int av1_alloc_restoration_struct(struct AV1Common *cm,
- RestorationInfo *rst_info, int width,
- int height);
+void av1_alloc_restoration_struct(struct AV1Common *cm, RestorationInfo *rsi,
+ int is_uv);
void av1_free_restoration_struct(RestorationInfo *rst_info);
void extend_frame(uint8_t *data, int width, int height, int stride,
- int border_horz, int border_vert);
-#if CONFIG_HIGHBITDEPTH
-void extend_frame_highbd(uint16_t *data, int width, int height, int stride,
- int border_horz, int border_vert);
-#endif // CONFIG_HIGHBITDEPTH
-void decode_xq(int *xqd, int *xq);
-void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- RestorationInfo *rsi, int components_pattern,
- int partial_frame, YV12_BUFFER_CONFIG *dst);
+ int border_horz, int border_vert, int highbd);
+void decode_xq(const int *xqd, int *xq, const sgr_params_type *params);
+
+// Filter a single loop restoration unit.
+//
+// limits is the limits of the unit. rui gives the mode to use for this unit
+// and its coefficients. If striped loop restoration is enabled, rsb contains
+// deblocked pixels to use for stripe boundaries; rlbs is just some space to
+// use as a scratch buffer. tile_rect gives the limits of the tile containing
+// this unit. tile_stripe0 is the index of the first stripe in this tile.
+//
+// ss_x and ss_y are flags which should be 1 if this is a plane with
+// horizontal/vertical subsampling, respectively. highbd is a flag which should
+// be 1 in high bit depth mode, in which case bit_depth is the bit depth.
+//
+// data8 is the frame data (pointing at the top-left corner of the frame, not
+// the restoration unit) and stride is its stride. dst8 is the buffer where the
+// results will be written and has stride dst_stride. Like data8, dst8 should
+// point at the top-left corner of the frame.
+//
+// Finally tmpbuf is a scratch buffer used by the sgrproj filter which should
+// be at least SGRPROJ_TMPBUF_SIZE big.
+void av1_loop_restoration_filter_unit(
+ const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
+ const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
+ const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
+ int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8,
+ int dst_stride, int32_t *tmpbuf, int optimized_lr);
+
+void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
+ struct AV1Common *cm, int optimized_lr,
+ void *lr_ctxt);
void av1_loop_restoration_precal();
+typedef void (*rest_tile_start_visitor_t)(int tile_row, int tile_col,
+ void *priv);
+struct AV1LrSyncData;
+
+typedef void (*sync_read_fn_t)(void *const lr_sync, int r, int c, int plane);
+
+typedef void (*sync_write_fn_t)(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane);
+
+// Call on_rest_unit for each loop restoration unit in the plane.
+void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
+ rest_unit_visitor_t on_rest_unit,
+ void *priv, AV1PixelRect *tile_rect,
+ int32_t *tmpbuf,
+ RestorationLineBuffers *rlbs);
+
// Return 1 iff the block at mi_row, mi_col with size bsize is a
// top-level superblock containing the top-left corner of at least one
-// loop restoration tile.
+// loop restoration unit.
//
// If the block is a top-level superblock, the function writes to
-// *rcol0, *rcol1, *rrow0, *rrow1. The rectangle of indices given by
-// [*rcol0, *rcol1) x [*rrow0, *rrow1) will point at the set of rtiles
-// whose top left corners lie in the superblock. Note that the set is
-// only nonempty if *rcol0 < *rcol1 and *rrow0 < *rrow1.
+// *rcol0, *rcol1, *rrow0, *rrow1. The rectangle of restoration unit
+// indices given by [*rcol0, *rcol1) x [*rrow0, *rrow1) are relative
+// to the current tile, whose starting index is returned as
+// *tile_tl_idx.
int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int *rcol0, int *rcol1, int *rrow0,
- int *rrow1, int *nhtiles);
-
-void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
- struct AV1Common *cm);
+ int *rrow1, int *tile_tl_idx);
+
+void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
+ struct AV1Common *cm,
+ int after_cdef);
+void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
+ YV12_BUFFER_CONFIG *frame,
+ struct AV1Common *cm,
+ int optimized_lr, int num_planes);
+void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
+ struct AV1Common *cm, int num_planes);
+void av1_foreach_rest_unit_in_row(
+ RestorationTileLimits *limits, const AV1PixelRect *tile_rect,
+ rest_unit_visitor_t on_rest_unit, int row_number, int unit_size,
+ int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane,
+ void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs,
+ sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write,
+ struct AV1LrSyncData *const lr_sync);
+AV1PixelRect av1_whole_frame_rect(const struct AV1Common *cm, int is_uv);
+int av1_lr_count_units_in_tile(int unit_size, int tile_size);
+void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane);
+void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/scale.c b/third_party/aom/av1/common/scale.c
index d5ccdfec0..c525fe229 100644
--- a/third_party/aom/av1/common/scale.c
+++ b/third_party/aom/av1/common/scale.c
@@ -9,7 +9,9 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/filter.h"
#include "av1/common/scale.h"
#include "aom_dsp/aom_filter.h"
@@ -46,12 +48,9 @@ static int get_fixed_point_scale_factor(int other_size, int this_size) {
return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size;
}
-static int get_coarse_point_scale_factor(int other_size, int this_size) {
- // Calculate scaling factor once for each reference frame
- // and use fixed point scaling factors in decoding and encoding routines.
- // Hardware implementations can calculate scale factor in device driver
- // and use multiplication and shifting on hardware instead of division.
- return ((other_size << SCALE_SUBPEL_BITS) + this_size / 2) / this_size;
+// Given the fixed point scale, calculate coarse point scale.
+static int fixed_point_scale_to_coarse_point_scale(int scale_fp) {
+ return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS);
}
// Note: x and y are integer precision, mvq4 is q4 precision.
@@ -64,14 +63,8 @@ MV32 av1_scale_mv(const MV *mvq4, int x, int y,
return res;
}
-#if CONFIG_HIGHBITDEPTH
-void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
- int other_h, int this_w, int this_h,
- int use_highbd) {
-#else
void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h) {
-#endif
if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
sf->x_scale_fp = REF_INVALID_SCALE;
sf->y_scale_fp = REF_INVALID_SCALE;
@@ -81,8 +74,8 @@ void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
- sf->x_step_q4 = get_coarse_point_scale_factor(other_w, this_w);
- sf->y_step_q4 = get_coarse_point_scale_factor(other_h, this_h);
+ sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp);
+ sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp);
if (av1_is_scaled(sf)) {
sf->scale_value_x = scaled_x;
@@ -92,95 +85,42 @@ void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
sf->scale_value_y = unscaled_value;
}
- // TODO(agrange): Investigate the best choice of functions to use here
- // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
- // to do at full-pel offsets. The current selection, where the filter is
- // applied in one direction only, and not at all for 0,0, seems to give the
- // best quality, but it may be worth trying an additional mode that does
- // do the filtering on full-pel.
- if (sf->x_step_q4 == SCALE_SUBPEL_SHIFTS) {
- if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
- // No scaling in either direction.
- sf->predict[0][0][0] = aom_convolve_copy;
- sf->predict[0][0][1] = aom_convolve_avg;
- sf->predict[0][1][0] = aom_convolve8_vert;
- sf->predict[0][1][1] = aom_convolve8_avg_vert;
- sf->predict[1][0][0] = aom_convolve8_horiz;
- sf->predict[1][0][1] = aom_convolve8_avg_horiz;
- } else {
- // No scaling in x direction. Must always scale in the y direction.
- sf->predict[0][0][0] = aom_convolve8_vert;
- sf->predict[0][0][1] = aom_convolve8_avg_vert;
- sf->predict[0][1][0] = aom_convolve8_vert;
- sf->predict[0][1][1] = aom_convolve8_avg_vert;
- sf->predict[1][0][0] = aom_convolve8;
- sf->predict[1][0][1] = aom_convolve8_avg;
- }
- } else {
- if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
- // No scaling in the y direction. Must always scale in the x direction.
- sf->predict[0][0][0] = aom_convolve8_horiz;
- sf->predict[0][0][1] = aom_convolve8_avg_horiz;
- sf->predict[0][1][0] = aom_convolve8;
- sf->predict[0][1][1] = aom_convolve8_avg;
- sf->predict[1][0][0] = aom_convolve8_horiz;
- sf->predict[1][0][1] = aom_convolve8_avg_horiz;
- } else {
- // Must always scale in both directions.
- sf->predict[0][0][0] = aom_convolve8;
- sf->predict[0][0][1] = aom_convolve8_avg;
- sf->predict[0][1][0] = aom_convolve8;
- sf->predict[0][1][1] = aom_convolve8_avg;
- sf->predict[1][0][0] = aom_convolve8;
- sf->predict[1][0][1] = aom_convolve8_avg;
- }
- }
- // 2D subpel motion always gets filtered in both directions
- sf->predict[1][1][0] = aom_convolve8;
- sf->predict[1][1][1] = aom_convolve8_avg;
-
-#if CONFIG_HIGHBITDEPTH
- if (use_highbd) {
- if (sf->x_step_q4 == SCALE_SUBPEL_SHIFTS) {
- if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
- // No scaling in either direction.
- sf->highbd_predict[0][0][0] = aom_highbd_convolve_copy;
- sf->highbd_predict[0][0][1] = aom_highbd_convolve_avg;
- sf->highbd_predict[0][1][0] = aom_highbd_convolve8_vert;
- sf->highbd_predict[0][1][1] = aom_highbd_convolve8_avg_vert;
- sf->highbd_predict[1][0][0] = aom_highbd_convolve8_horiz;
- sf->highbd_predict[1][0][1] = aom_highbd_convolve8_avg_horiz;
- } else {
- // No scaling in x direction. Must always scale in the y direction.
- sf->highbd_predict[0][0][0] = aom_highbd_convolve8_vert;
- sf->highbd_predict[0][0][1] = aom_highbd_convolve8_avg_vert;
- sf->highbd_predict[0][1][0] = aom_highbd_convolve8_vert;
- sf->highbd_predict[0][1][1] = aom_highbd_convolve8_avg_vert;
- sf->highbd_predict[1][0][0] = aom_highbd_convolve8;
- sf->highbd_predict[1][0][1] = aom_highbd_convolve8_avg;
- }
- } else {
- if (sf->y_step_q4 == SCALE_SUBPEL_SHIFTS) {
- // No scaling in the y direction. Must always scale in the x direction.
- sf->highbd_predict[0][0][0] = aom_highbd_convolve8_horiz;
- sf->highbd_predict[0][0][1] = aom_highbd_convolve8_avg_horiz;
- sf->highbd_predict[0][1][0] = aom_highbd_convolve8;
- sf->highbd_predict[0][1][1] = aom_highbd_convolve8_avg;
- sf->highbd_predict[1][0][0] = aom_highbd_convolve8_horiz;
- sf->highbd_predict[1][0][1] = aom_highbd_convolve8_avg_horiz;
- } else {
- // Must always scale in both directions.
- sf->highbd_predict[0][0][0] = aom_highbd_convolve8;
- sf->highbd_predict[0][0][1] = aom_highbd_convolve8_avg;
- sf->highbd_predict[0][1][0] = aom_highbd_convolve8;
- sf->highbd_predict[0][1][1] = aom_highbd_convolve8_avg;
- sf->highbd_predict[1][0][0] = aom_highbd_convolve8;
- sf->highbd_predict[1][0][1] = aom_highbd_convolve8_avg;
- }
- }
- // 2D subpel motion always gets filtered in both directions.
- sf->highbd_predict[1][1][0] = aom_highbd_convolve8;
- sf->highbd_predict[1][1][1] = aom_highbd_convolve8_avg;
- }
-#endif // CONFIG_HIGHBITDEPTH
+ // AV1 convolve functions
+ // Special case convolve functions should produce the same result as
+ // av1_convolve_2d.
+ // subpel_x_q4 == 0 && subpel_y_q4 == 0
+ sf->convolve[0][0][0] = av1_convolve_2d_copy_sr;
+ // subpel_x_q4 == 0
+ sf->convolve[0][1][0] = av1_convolve_y_sr;
+ // subpel_y_q4 == 0
+ sf->convolve[1][0][0] = av1_convolve_x_sr;
+ // subpel_x_q4 != 0 && subpel_y_q4 != 0
+ sf->convolve[1][1][0] = av1_convolve_2d_sr;
+ // subpel_x_q4 == 0 && subpel_y_q4 == 0
+ sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy;
+ // subpel_x_q4 == 0
+ sf->convolve[0][1][1] = av1_jnt_convolve_y;
+ // subpel_y_q4 == 0
+ sf->convolve[1][0][1] = av1_jnt_convolve_x;
+ // subpel_x_q4 != 0 && subpel_y_q4 != 0
+ sf->convolve[1][1][1] = av1_jnt_convolve_2d;
+ // AV1 High BD convolve functions
+ // Special case convolve functions should produce the same result as
+ // av1_highbd_convolve_2d.
+ // subpel_x_q4 == 0 && subpel_y_q4 == 0
+ sf->highbd_convolve[0][0][0] = av1_highbd_convolve_2d_copy_sr;
+ // subpel_x_q4 == 0
+ sf->highbd_convolve[0][1][0] = av1_highbd_convolve_y_sr;
+ // subpel_y_q4 == 0
+ sf->highbd_convolve[1][0][0] = av1_highbd_convolve_x_sr;
+ // subpel_x_q4 != 0 && subpel_y_q4 != 0
+ sf->highbd_convolve[1][1][0] = av1_highbd_convolve_2d_sr;
+ // subpel_x_q4 == 0 && subpel_y_q4 == 0
+ sf->highbd_convolve[0][0][1] = av1_highbd_jnt_convolve_2d_copy;
+ // subpel_x_q4 == 0
+ sf->highbd_convolve[0][1][1] = av1_highbd_jnt_convolve_y;
+ // subpel_y_q4 == 0
+ sf->highbd_convolve[1][0][1] = av1_highbd_jnt_convolve_x;
+ // subpel_x_q4 != 0 && subpel_y_q4 != 0
+ sf->highbd_convolve[1][1][1] = av1_highbd_jnt_convolve_2d;
}
diff --git a/third_party/aom/av1/common/scale.h b/third_party/aom/av1/common/scale.h
index 900e6bf47..5f02fdb81 100644
--- a/third_party/aom/av1/common/scale.h
+++ b/third_party/aom/av1/common/scale.h
@@ -12,6 +12,7 @@
#ifndef AV1_COMMON_SCALE_H_
#define AV1_COMMON_SCALE_H_
+#include "av1/common/convolve.h"
#include "av1/common/mv.h"
#include "aom_dsp/aom_convolve.h"
@@ -34,22 +35,15 @@ struct scale_factors {
int (*scale_value_x)(int val, const struct scale_factors *sf);
int (*scale_value_y)(int val, const struct scale_factors *sf);
- convolve_fn_t predict[2][2][2]; // horiz, vert, avg
-#if CONFIG_HIGHBITDEPTH
- highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg
-#endif // CONFIG_HIGHBITDEPTH
+ // convolve_fn_ptr[subpel_x != 0][subpel_y != 0][is_compound]
+ aom_convolve_fn_t convolve[2][2][2];
+ aom_highbd_convolve_fn_t highbd_convolve[2][2][2];
};
MV32 av1_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
-#if CONFIG_HIGHBITDEPTH
-void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
- int other_h, int this_w, int this_h,
- int use_high);
-#else
void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h);
-#endif // CONFIG_HIGHBITDEPTH
static INLINE int av1_is_valid_scale(const struct scale_factors *sf) {
return sf->x_scale_fp != REF_INVALID_SCALE &&
diff --git a/third_party/aom/av1/common/scan.c b/third_party/aom/av1/common/scan.c
index 3c8f3d7ac..31a787b53 100644
--- a/third_party/aom/av1/common/scan.c
+++ b/third_party/aom/av1/common/scan.c
@@ -14,17 +14,10 @@
#include "av1/common/common_data.h"
#include "av1/common/scan.h"
-#if CONFIG_CHROMA_2X2
-DECLARE_ALIGNED(16, static const int16_t, default_scan_2x2[4]) = {
- 0, 1, 2, 3,
-};
-#endif
-
DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
- 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15,
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
};
@@ -32,19 +25,10 @@ DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = {
- 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = {
- 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
-};
DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
- 0, 1, 4, 5, 2, 8, 6, 9, 10, 3, 12, 7, 13, 11, 14, 16,
- 17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14,
+ 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 27, 30, 31,
};
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
@@ -58,8 +42,8 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
- 0, 1, 8, 9, 2, 16, 10, 17, 18, 3, 24, 11, 25, 19, 26, 4,
- 12, 27, 20, 5, 28, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+ 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 25, 18, 11, 4, 26, 19,
+ 12, 5, 27, 20, 13, 6, 28, 21, 14, 7, 29, 22, 15, 30, 23, 31,
};
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
@@ -73,20 +57,19 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = {
- 0, 1, 4, 5, 2, 8, 6, 9, 10, 3, 12, 7, 13, 11, 14, 16,
- 17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14,
+ 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 32, 27, 30,
+ 33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46,
+ 49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63,
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = {
- 0, 1, 16, 17, 2, 32, 18, 33, 34, 3, 48, 19, 49, 35, 50, 4,
- 20, 51, 36, 5, 52, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55,
- 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59,
- 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 49, 34, 19, 4, 50, 35,
+ 20, 5, 51, 36, 21, 6, 52, 37, 22, 7, 53, 38, 23, 8, 54, 39,
+ 24, 9, 55, 40, 25, 10, 56, 41, 26, 11, 57, 42, 27, 12, 58, 43,
+ 28, 13, 59, 44, 29, 14, 60, 45, 30, 15, 61, 46, 31, 62, 47, 63,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x16[64]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -114,7 +97,6 @@ DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x4[64]) = {
8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59,
12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = {
0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
@@ -138,27 +120,26 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_32x8[256]) = {
- 0, 1, 32, 2, 33, 64, 3, 34, 65, 96, 4, 35, 66, 97, 128,
- 5, 36, 67, 98, 129, 160, 6, 37, 68, 99, 130, 161, 192, 7, 38,
- 69, 100, 131, 162, 193, 224, 8, 39, 70, 101, 132, 163, 194, 225, 9,
- 40, 71, 102, 133, 164, 195, 226, 10, 41, 72, 103, 134, 165, 196, 227,
- 11, 42, 73, 104, 135, 166, 197, 228, 12, 43, 74, 105, 136, 167, 198,
- 229, 13, 44, 75, 106, 137, 168, 199, 230, 14, 45, 76, 107, 138, 169,
- 200, 231, 15, 46, 77, 108, 139, 170, 201, 232, 16, 47, 78, 109, 140,
- 171, 202, 233, 17, 48, 79, 110, 141, 172, 203, 234, 18, 49, 80, 111,
- 142, 173, 204, 235, 19, 50, 81, 112, 143, 174, 205, 236, 20, 51, 82,
- 113, 144, 175, 206, 237, 21, 52, 83, 114, 145, 176, 207, 238, 22, 53,
- 84, 115, 146, 177, 208, 239, 23, 54, 85, 116, 147, 178, 209, 240, 24,
- 55, 86, 117, 148, 179, 210, 241, 25, 56, 87, 118, 149, 180, 211, 242,
- 26, 57, 88, 119, 150, 181, 212, 243, 27, 58, 89, 120, 151, 182, 213,
- 244, 28, 59, 90, 121, 152, 183, 214, 245, 29, 60, 91, 122, 153, 184,
- 215, 246, 30, 61, 92, 123, 154, 185, 216, 247, 31, 62, 93, 124, 155,
- 186, 217, 248, 63, 94, 125, 156, 187, 218, 249, 95, 126, 157, 188, 219,
- 250, 127, 158, 189, 220, 251, 159, 190, 221, 252, 191, 222, 253, 223, 254,
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4,
+ 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193,
+ 162, 131, 100, 69, 38, 7, 225, 194, 163, 132, 101, 70, 39, 8, 226,
+ 195, 164, 133, 102, 71, 40, 9, 227, 196, 165, 134, 103, 72, 41, 10,
+ 228, 197, 166, 135, 104, 73, 42, 11, 229, 198, 167, 136, 105, 74, 43,
+ 12, 230, 199, 168, 137, 106, 75, 44, 13, 231, 200, 169, 138, 107, 76,
+ 45, 14, 232, 201, 170, 139, 108, 77, 46, 15, 233, 202, 171, 140, 109,
+ 78, 47, 16, 234, 203, 172, 141, 110, 79, 48, 17, 235, 204, 173, 142,
+ 111, 80, 49, 18, 236, 205, 174, 143, 112, 81, 50, 19, 237, 206, 175,
+ 144, 113, 82, 51, 20, 238, 207, 176, 145, 114, 83, 52, 21, 239, 208,
+ 177, 146, 115, 84, 53, 22, 240, 209, 178, 147, 116, 85, 54, 23, 241,
+ 210, 179, 148, 117, 86, 55, 24, 242, 211, 180, 149, 118, 87, 56, 25,
+ 243, 212, 181, 150, 119, 88, 57, 26, 244, 213, 182, 151, 120, 89, 58,
+ 27, 245, 214, 183, 152, 121, 90, 59, 28, 246, 215, 184, 153, 122, 91,
+ 60, 29, 247, 216, 185, 154, 123, 92, 61, 30, 248, 217, 186, 155, 124,
+ 93, 62, 31, 249, 218, 187, 156, 125, 94, 63, 250, 219, 188, 157, 126,
+ 95, 251, 220, 189, 158, 127, 252, 221, 190, 159, 253, 222, 191, 254, 223,
255,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x32[256]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
@@ -240,16 +221,14 @@ DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x8[256]) = {
28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, 189, 221, 253,
30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
- 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26,
- 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6,
- 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30,
- 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = {
0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
@@ -263,21 +242,6 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = {
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = {
- 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3,
- 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28,
- 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7,
- 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = {
- 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12,
- 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35,
- 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51,
- 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
-};
DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
@@ -292,14 +256,14 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
- 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64, 5,
- 20, 35, 50, 65, 80, 6, 21, 36, 51, 66, 81, 96, 7, 22, 37, 52,
- 67, 82, 97, 112, 8, 23, 38, 53, 68, 83, 98, 113, 9, 24, 39, 54,
- 69, 84, 99, 114, 10, 25, 40, 55, 70, 85, 100, 115, 11, 26, 41, 56,
- 71, 86, 101, 116, 12, 27, 42, 57, 72, 87, 102, 117, 13, 28, 43, 58,
- 73, 88, 103, 118, 14, 29, 44, 59, 74, 89, 104, 119, 15, 30, 45, 60,
- 75, 90, 105, 120, 31, 46, 61, 76, 91, 106, 121, 47, 62, 77, 92, 107,
- 122, 63, 78, 93, 108, 123, 79, 94, 109, 124, 95, 110, 125, 111, 126, 127,
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80,
+ 65, 50, 35, 20, 5, 96, 81, 66, 51, 36, 21, 6, 112, 97, 82, 67,
+ 52, 37, 22, 7, 113, 98, 83, 68, 53, 38, 23, 8, 114, 99, 84, 69,
+ 54, 39, 24, 9, 115, 100, 85, 70, 55, 40, 25, 10, 116, 101, 86, 71,
+ 56, 41, 26, 11, 117, 102, 87, 72, 57, 42, 27, 12, 118, 103, 88, 73,
+ 58, 43, 28, 13, 119, 104, 89, 74, 59, 44, 29, 14, 120, 105, 90, 75,
+ 60, 45, 30, 15, 121, 106, 91, 76, 61, 46, 31, 122, 107, 92, 77, 62,
+ 47, 123, 108, 93, 78, 63, 124, 109, 94, 79, 125, 110, 95, 126, 111, 127,
};
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
@@ -387,41 +351,41 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
- 0, 1, 32, 2, 33, 64, 3, 34, 65, 96, 4, 35, 66, 97, 128,
- 5, 36, 67, 98, 129, 160, 6, 37, 68, 99, 130, 161, 192, 7, 38,
- 69, 100, 131, 162, 193, 224, 8, 39, 70, 101, 132, 163, 194, 225, 256,
- 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 10, 41, 72, 103, 134,
- 165, 196, 227, 258, 289, 320, 11, 42, 73, 104, 135, 166, 197, 228, 259,
- 290, 321, 352, 12, 43, 74, 105, 136, 167, 198, 229, 260, 291, 322, 353,
- 384, 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385, 416,
- 14, 45, 76, 107, 138, 169, 200, 231, 262, 293, 324, 355, 386, 417, 448,
- 15, 46, 77, 108, 139, 170, 201, 232, 263, 294, 325, 356, 387, 418, 449,
- 480, 16, 47, 78, 109, 140, 171, 202, 233, 264, 295, 326, 357, 388, 419,
- 450, 481, 17, 48, 79, 110, 141, 172, 203, 234, 265, 296, 327, 358, 389,
- 420, 451, 482, 18, 49, 80, 111, 142, 173, 204, 235, 266, 297, 328, 359,
- 390, 421, 452, 483, 19, 50, 81, 112, 143, 174, 205, 236, 267, 298, 329,
- 360, 391, 422, 453, 484, 20, 51, 82, 113, 144, 175, 206, 237, 268, 299,
- 330, 361, 392, 423, 454, 485, 21, 52, 83, 114, 145, 176, 207, 238, 269,
- 300, 331, 362, 393, 424, 455, 486, 22, 53, 84, 115, 146, 177, 208, 239,
- 270, 301, 332, 363, 394, 425, 456, 487, 23, 54, 85, 116, 147, 178, 209,
- 240, 271, 302, 333, 364, 395, 426, 457, 488, 24, 55, 86, 117, 148, 179,
- 210, 241, 272, 303, 334, 365, 396, 427, 458, 489, 25, 56, 87, 118, 149,
- 180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 26, 57, 88, 119,
- 150, 181, 212, 243, 274, 305, 336, 367, 398, 429, 460, 491, 27, 58, 89,
- 120, 151, 182, 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 28, 59,
- 90, 121, 152, 183, 214, 245, 276, 307, 338, 369, 400, 431, 462, 493, 29,
- 60, 91, 122, 153, 184, 215, 246, 277, 308, 339, 370, 401, 432, 463, 494,
- 30, 61, 92, 123, 154, 185, 216, 247, 278, 309, 340, 371, 402, 433, 464,
- 495, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434,
- 465, 496, 63, 94, 125, 156, 187, 218, 249, 280, 311, 342, 373, 404, 435,
- 466, 497, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467,
- 498, 127, 158, 189, 220, 251, 282, 313, 344, 375, 406, 437, 468, 499, 159,
- 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 191, 222, 253, 284,
- 315, 346, 377, 408, 439, 470, 501, 223, 254, 285, 316, 347, 378, 409, 440,
- 471, 502, 255, 286, 317, 348, 379, 410, 441, 472, 503, 287, 318, 349, 380,
- 411, 442, 473, 504, 319, 350, 381, 412, 443, 474, 505, 351, 382, 413, 444,
- 475, 506, 383, 414, 445, 476, 507, 415, 446, 477, 508, 447, 478, 509, 479,
- 510, 511,
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4,
+ 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193,
+ 162, 131, 100, 69, 38, 7, 256, 225, 194, 163, 132, 101, 70, 39, 8,
+ 288, 257, 226, 195, 164, 133, 102, 71, 40, 9, 320, 289, 258, 227, 196,
+ 165, 134, 103, 72, 41, 10, 352, 321, 290, 259, 228, 197, 166, 135, 104,
+ 73, 42, 11, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43,
+ 12, 416, 385, 354, 323, 292, 261, 230, 199, 168, 137, 106, 75, 44, 13,
+ 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14,
+ 480, 449, 418, 387, 356, 325, 294, 263, 232, 201, 170, 139, 108, 77, 46,
+ 15, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78,
+ 47, 16, 482, 451, 420, 389, 358, 327, 296, 265, 234, 203, 172, 141, 110,
+ 79, 48, 17, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142,
+ 111, 80, 49, 18, 484, 453, 422, 391, 360, 329, 298, 267, 236, 205, 174,
+ 143, 112, 81, 50, 19, 485, 454, 423, 392, 361, 330, 299, 268, 237, 206,
+ 175, 144, 113, 82, 51, 20, 486, 455, 424, 393, 362, 331, 300, 269, 238,
+ 207, 176, 145, 114, 83, 52, 21, 487, 456, 425, 394, 363, 332, 301, 270,
+ 239, 208, 177, 146, 115, 84, 53, 22, 488, 457, 426, 395, 364, 333, 302,
+ 271, 240, 209, 178, 147, 116, 85, 54, 23, 489, 458, 427, 396, 365, 334,
+ 303, 272, 241, 210, 179, 148, 117, 86, 55, 24, 490, 459, 428, 397, 366,
+ 335, 304, 273, 242, 211, 180, 149, 118, 87, 56, 25, 491, 460, 429, 398,
+ 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 492, 461, 430,
+ 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 493, 462,
+ 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 494,
+ 463, 432, 401, 370, 339, 308, 277, 246, 215, 184, 153, 122, 91, 60, 29,
+ 495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61,
+ 30, 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93,
+ 62, 31, 497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125,
+ 94, 63, 498, 467, 436, 405, 374, 343, 312, 281, 250, 219, 188, 157, 126,
+ 95, 499, 468, 437, 406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 500,
+ 469, 438, 407, 376, 345, 314, 283, 252, 221, 190, 159, 501, 470, 439, 408,
+ 377, 346, 315, 284, 253, 222, 191, 502, 471, 440, 409, 378, 347, 316, 285,
+ 254, 223, 503, 472, 441, 410, 379, 348, 317, 286, 255, 504, 473, 442, 411,
+ 380, 349, 318, 287, 505, 474, 443, 412, 381, 350, 319, 506, 475, 444, 413,
+ 382, 351, 507, 476, 445, 414, 383, 508, 477, 446, 415, 509, 478, 447, 510,
+ 479, 511,
};
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
@@ -574,27 +538,26 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x16[512]) = {
};
DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
- 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65,
- 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112,
- 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7,
- 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116,
- 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147,
- 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192,
- 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11,
- 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43,
- 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137,
- 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212,
- 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45,
- 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15,
- 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216,
- 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232,
- 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219,
- 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
- 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
- 255,
+ 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
+ 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
+ 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
+ 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
+ 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
+ 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
+ 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
+ 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
+ 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
+ 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
+ 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
+ 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
+ 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
+ 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
+ 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
+ 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
+ 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
+ 255
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = {
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
@@ -634,51 +597,7 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = {
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = {
- 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18,
- 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51,
- 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161,
- 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6,
- 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70,
- 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55,
- 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72,
- 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57,
- 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198,
- 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184,
- 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75,
- 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76,
- 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61,
- 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172,
- 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126,
- 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
- 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = {
- 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48,
- 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9,
- 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54,
- 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26,
- 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72,
- 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145,
- 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161,
- 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45,
- 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122,
- 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209,
- 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183,
- 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78,
- 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243,
- 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245,
- 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188,
- 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
- 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
- 255,
-};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = {
0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
@@ -837,998 +756,97 @@ DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = {
1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
- 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66,
- 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5,
- 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6,
- 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320,
- 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71,
- 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
- 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104,
- 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418,
- 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137,
- 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358,
- 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75,
- 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
- 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329,
- 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330,
- 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299,
- 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363,
- 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111,
- 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19,
- 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672,
- 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673,
- 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146,
- 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457,
- 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54,
- 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303,
- 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366,
- 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644,
- 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397,
- 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584,
- 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57,
- 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523,
- 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120,
- 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
- 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
- 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
- 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152,
- 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277,
- 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650,
- 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92,
- 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
- 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279,
- 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838,
- 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311,
- 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622,
- 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
- 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901,
- 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964,
- 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437,
- 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841,
- 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438,
- 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873,
- 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346,
- 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502,
- 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904,
- 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533,
- 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658,
- 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907,
- 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504,
- 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815,
- 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002,
- 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879,
- 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785,
- 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724,
- 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880,
- 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
- 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602,
- 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510,
- 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820,
- 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697,
- 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760,
- 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
- 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638,
- 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700,
- 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950,
- 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889,
- 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891,
- 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
- 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862,
- 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926,
- 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
-};
-
-// Scan over two rectangular vertical partitions one after the other
-DECLARE_ALIGNED(16, static const int16_t, v2_scan_32x32[1024]) = {
- 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
- 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
- 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
- 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
- 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
- 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
- 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
- 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
- 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
- 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
- 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
- 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
- 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
- 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
- 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
- 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
- 238, 455, 175, 301, 425, 485, 512, 513, 270, 456, 514, 207, 486,
- 364, 395, 515, 333, 426, 516, 239, 487, 302, 457, 517, 396, 271,
- 488, 544, 365, 427, 545, 518, 546, 334, 458, 547, 519, 548, 303,
- 489, 397, 428, 549, 366, 459, 520, 576, 335, 490, 550, 577, 578,
- 579, 521, 429, 551, 398, 460, 580, 367, 491, 581, 552, 522, 582,
- 608, 609, 430, 461, 610, 399, 492, 553, 611, 583, 523, 612, 613,
- 584, 554, 462, 431, 493, 614, 524, 640, 641, 642, 585, 643, 555,
- 615, 644, 463, 494, 586, 525, 616, 645, 556, 646, 672, 617, 673,
- 587, 674, 647, 495, 675, 526, 676, 557, 618, 648, 677, 588, 678,
- 527, 649, 619, 704, 558, 705, 706, 679, 589, 707, 650, 708, 620,
- 680, 709, 559, 590, 710, 651, 681, 736, 621, 737, 711, 738, 739,
- 682, 652, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769,
- 743, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715, 685, 745,
- 774, 655, 775, 800, 801, 716, 746, 802, 803, 686, 776, 804, 747,
- 805, 717, 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 835,
- 808, 836, 779, 749, 837, 809, 719, 838, 780, 750, 810, 839, 864,
- 865, 866, 867, 840, 781, 868, 811, 751, 869, 841, 870, 812, 782,
- 842, 871, 896, 897, 898, 872, 899, 813, 843, 900, 783, 901, 873,
- 844, 902, 814, 874, 903, 928, 929, 845, 930, 904, 815, 875, 931,
- 932, 905, 933, 846, 876, 934, 906, 935, 877, 960, 847, 961, 962,
- 907, 936, 963, 964, 937, 878, 965, 908, 966, 938, 967, 909, 879,
- 992, 939, 993, 968, 994, 995, 996, 910, 969, 940, 997, 998, 970,
- 911, 941, 999, 971, 1000, 942, 1001, 972, 1002, 943, 973, 1003, 974,
- 1004, 975, 1005, 1006, 1007, 16, 48, 80, 112, 144, 176, 17, 49,
- 208, 81, 113, 145, 240, 177, 272, 18, 50, 209, 82, 114, 304,
- 241, 146, 178, 273, 336, 210, 19, 51, 83, 115, 305, 242, 147,
- 368, 179, 274, 337, 211, 20, 400, 52, 84, 306, 116, 243, 369,
- 148, 338, 180, 275, 432, 401, 212, 21, 53, 307, 85, 370, 244,
- 117, 464, 149, 433, 339, 276, 181, 402, 213, 308, 496, 371, 22,
- 54, 465, 86, 245, 118, 434, 150, 340, 277, 403, 182, 528, 497,
- 214, 466, 372, 309, 23, 55, 435, 87, 246, 119, 341, 404, 151,
- 529, 560, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56, 247,
- 561, 88, 530, 592, 342, 120, 405, 499, 152, 279, 468, 184, 374,
- 311, 437, 216, 562, 593, 531, 624, 25, 248, 500, 57, 406, 89,
- 343, 121, 469, 280, 153, 594, 185, 375, 563, 625, 438, 532, 656,
- 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122, 595, 626,
- 281, 564, 657, 154, 376, 533, 688, 439, 186, 313, 502, 218, 408,
- 627, 596, 658, 250, 345, 471, 27, 59, 565, 689, 91, 123, 282,
- 534, 720, 155, 440, 377, 187, 503, 314, 628, 659, 219, 597, 690,
- 409, 472, 566, 721, 346, 251, 28, 60, 535, 752, 92, 124, 283,
- 441, 378, 156, 660, 504, 629, 691, 598, 722, 188, 315, 567, 753,
- 220, 410, 473, 347, 536, 784, 252, 29, 661, 692, 61, 93, 442,
- 630, 723, 284, 125, 379, 505, 599, 754, 157, 316, 568, 785, 189,
- 474, 411, 221, 537, 816, 693, 348, 662, 724, 253, 631, 755, 443,
- 30, 600, 786, 62, 506, 94, 285, 380, 126, 569, 817, 158, 317,
- 190, 475, 694, 725, 412, 663, 756, 538, 848, 222, 632, 787, 349,
- 254, 601, 818, 444, 507, 31, 63, 381, 286, 95, 570, 849, 726,
- 127, 695, 757, 664, 788, 159, 476, 318, 413, 539, 880, 191, 633,
- 819, 223, 350, 602, 850, 508, 255, 445, 727, 758, 696, 789, 571,
- 881, 382, 287, 665, 820, 477, 634, 851, 540, 912, 319, 414, 603,
- 882, 759, 728, 790, 351, 509, 697, 821, 446, 572, 913, 666, 852,
- 383, 635, 883, 478, 541, 944, 415, 760, 791, 604, 914, 729, 822,
- 698, 853, 510, 667, 884, 447, 573, 945, 636, 915, 792, 761, 823,
- 542, 976, 479, 730, 854, 605, 946, 699, 885, 668, 916, 511, 574,
- 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
- 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
- 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
- 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
- 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
- 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
- 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
- 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
- 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
-};
-
-// Scan over two rectangular horizontal partitions one after the other
-DECLARE_ALIGNED(16, static const int16_t, h2_scan_32x32[1024]) = {
- 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
- 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
- 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
- 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
- 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
- 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
- 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
- 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
- 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
- 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
- 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
- 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
- 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
- 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
- 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
- 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
- 238, 455, 175, 301, 425, 485, 16, 48, 80, 270, 456, 207, 486,
- 112, 364, 395, 333, 426, 144, 239, 487, 302, 457, 176, 396, 17,
- 271, 488, 49, 365, 427, 208, 81, 334, 458, 113, 145, 240, 303,
- 489, 397, 428, 177, 366, 459, 272, 18, 50, 209, 335, 490, 82,
- 114, 304, 241, 429, 146, 398, 460, 367, 491, 178, 273, 336, 210,
- 19, 51, 83, 430, 461, 399, 492, 115, 305, 242, 147, 368, 179,
- 274, 337, 462, 431, 493, 211, 20, 400, 52, 84, 306, 116, 243,
- 369, 148, 463, 494, 338, 180, 275, 432, 401, 212, 21, 53, 307,
- 85, 370, 244, 117, 495, 464, 149, 433, 339, 276, 181, 402, 213,
- 308, 496, 371, 22, 54, 465, 86, 245, 118, 434, 150, 340, 277,
- 403, 182, 497, 214, 466, 372, 309, 23, 55, 435, 87, 246, 119,
- 341, 404, 151, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56,
- 247, 88, 342, 120, 405, 499, 152, 279, 468, 184, 374, 311, 437,
- 216, 25, 248, 500, 57, 406, 89, 343, 121, 469, 280, 153, 185,
- 375, 438, 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122,
- 281, 154, 376, 439, 186, 313, 502, 218, 408, 250, 345, 471, 27,
- 59, 91, 123, 282, 155, 440, 377, 187, 503, 314, 219, 409, 472,
- 346, 251, 28, 60, 92, 124, 283, 441, 378, 156, 504, 188, 315,
- 220, 410, 473, 347, 252, 29, 61, 93, 442, 284, 125, 379, 505,
- 157, 316, 189, 474, 411, 221, 348, 253, 443, 30, 62, 506, 94,
- 285, 380, 126, 158, 317, 190, 475, 412, 222, 349, 254, 444, 507,
- 31, 63, 381, 286, 95, 127, 159, 476, 318, 413, 191, 223, 350,
- 508, 255, 445, 382, 287, 477, 319, 414, 351, 509, 446, 383, 478,
- 415, 510, 447, 479, 511, 512, 513, 514, 515, 516, 517, 544, 545,
- 518, 546, 547, 519, 548, 549, 520, 576, 550, 577, 578, 579, 521,
- 551, 580, 581, 552, 522, 582, 608, 609, 610, 553, 611, 583, 523,
- 612, 613, 584, 554, 614, 524, 640, 641, 642, 585, 643, 555, 615,
- 644, 586, 525, 616, 645, 556, 646, 672, 617, 673, 587, 674, 647,
- 675, 526, 676, 557, 618, 648, 677, 588, 678, 527, 649, 619, 704,
- 558, 705, 706, 679, 589, 707, 650, 708, 620, 680, 709, 528, 559,
- 590, 710, 651, 681, 736, 621, 737, 711, 738, 739, 682, 652, 529,
- 560, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769, 561,
- 743, 530, 592, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715,
- 685, 745, 774, 562, 593, 531, 624, 655, 775, 800, 801, 716, 746,
- 802, 803, 686, 776, 804, 594, 563, 625, 747, 805, 717, 532, 656,
- 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 595, 626, 835,
- 564, 657, 808, 836, 533, 688, 779, 749, 837, 809, 719, 838, 780,
- 627, 596, 658, 750, 810, 839, 864, 565, 689, 865, 866, 867, 534,
- 720, 840, 781, 868, 811, 751, 869, 841, 628, 659, 597, 690, 870,
- 812, 782, 566, 721, 842, 871, 896, 535, 752, 897, 898, 872, 899,
- 813, 843, 660, 900, 783, 629, 691, 598, 722, 901, 873, 567, 753,
- 844, 902, 814, 874, 536, 784, 903, 661, 692, 928, 929, 630, 723,
- 845, 930, 904, 815, 875, 931, 599, 754, 932, 568, 785, 905, 933,
- 846, 876, 934, 537, 816, 693, 662, 724, 906, 631, 755, 935, 877,
- 600, 786, 960, 847, 961, 962, 907, 936, 963, 569, 817, 964, 937,
- 694, 725, 878, 965, 908, 663, 756, 538, 848, 966, 632, 787, 938,
- 601, 818, 967, 909, 879, 992, 939, 993, 968, 570, 849, 994, 726,
- 695, 757, 995, 664, 788, 996, 910, 969, 539, 880, 940, 633, 819,
- 997, 998, 602, 850, 970, 911, 941, 999, 727, 758, 696, 789, 571,
- 881, 971, 665, 820, 1000, 634, 851, 942, 540, 912, 1001, 972, 603,
- 882, 759, 728, 790, 1002, 697, 821, 943, 973, 572, 913, 666, 852,
- 1003, 635, 883, 974, 541, 944, 760, 791, 1004, 604, 914, 729, 822,
- 698, 853, 975, 667, 884, 573, 945, 1005, 636, 915, 792, 761, 823,
- 542, 976, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 1007, 574,
- 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
- 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
- 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
- 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
- 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
- 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
- 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
- 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
- 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
-};
-
-// Scan where the top left quarter is scanned first
-DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = {
- 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
- 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
- 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
- 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
- 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
- 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
- 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
- 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
- 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
- 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
- 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
- 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
- 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
- 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
- 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
- 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
- 238, 455, 175, 301, 425, 485, 270, 456, 207, 486, 364, 395, 333,
- 426, 239, 487, 302, 457, 396, 271, 488, 365, 427, 334, 458, 303,
- 489, 397, 428, 366, 459, 335, 490, 429, 398, 460, 367, 491, 430,
- 461, 399, 492, 462, 431, 493, 463, 494, 495, 16, 512, 48, 513,
- 80, 514, 112, 515, 144, 516, 176, 517, 17, 544, 49, 545, 208,
- 518, 81, 546, 113, 547, 145, 240, 519, 548, 177, 549, 272, 520,
- 18, 576, 50, 209, 550, 577, 82, 578, 114, 579, 304, 521, 241,
- 551, 146, 580, 178, 581, 273, 552, 336, 522, 210, 582, 19, 608,
- 51, 609, 83, 610, 115, 305, 553, 611, 242, 583, 147, 368, 523,
- 612, 179, 613, 274, 584, 337, 554, 211, 614, 20, 400, 524, 640,
- 52, 641, 84, 642, 306, 585, 116, 643, 243, 369, 555, 615, 148,
- 644, 338, 586, 180, 275, 432, 525, 616, 645, 401, 556, 212, 646,
- 21, 672, 53, 307, 617, 673, 85, 370, 587, 674, 244, 647, 117,
- 675, 464, 526, 149, 676, 433, 557, 339, 618, 276, 648, 181, 677,
- 402, 588, 213, 678, 308, 496, 527, 649, 371, 619, 22, 704, 54,
- 465, 558, 705, 86, 706, 245, 679, 118, 434, 589, 707, 150, 340,
- 650, 708, 277, 403, 620, 680, 182, 709, 528, 497, 559, 214, 466,
- 590, 710, 372, 651, 309, 681, 23, 736, 55, 435, 621, 737, 87,
- 246, 711, 738, 119, 739, 341, 682, 404, 652, 151, 529, 560, 740,
- 278, 712, 498, 591, 183, 741, 467, 622, 373, 683, 215, 310, 713,
- 742, 436, 653, 24, 768, 56, 769, 247, 561, 743, 88, 530, 592,
- 770, 342, 714, 120, 405, 684, 771, 499, 623, 152, 772, 279, 744,
- 468, 654, 184, 773, 374, 715, 311, 437, 685, 745, 216, 774, 562,
- 593, 531, 624, 25, 248, 500, 655, 775, 800, 57, 801, 406, 716,
- 89, 343, 746, 802, 121, 803, 469, 686, 280, 776, 153, 804, 594,
- 185, 375, 563, 625, 747, 805, 438, 717, 532, 656, 312, 777, 217,
- 806, 501, 687, 407, 748, 249, 807, 26, 344, 778, 832, 58, 833,
- 90, 470, 718, 834, 122, 595, 626, 835, 281, 564, 657, 808, 154,
- 836, 376, 533, 688, 779, 439, 749, 186, 837, 313, 809, 502, 719,
- 218, 838, 408, 780, 627, 596, 658, 250, 345, 471, 750, 810, 839,
- 27, 864, 59, 565, 689, 865, 91, 866, 123, 867, 282, 534, 720,
- 840, 155, 440, 781, 868, 377, 811, 187, 503, 751, 869, 314, 841,
- 628, 659, 219, 597, 690, 870, 409, 812, 472, 782, 566, 721, 346,
- 842, 251, 871, 28, 896, 60, 535, 752, 897, 92, 898, 124, 283,
- 872, 899, 441, 813, 378, 843, 156, 660, 900, 504, 783, 629, 691,
- 598, 722, 188, 901, 315, 873, 567, 753, 220, 410, 844, 902, 473,
- 814, 347, 874, 536, 784, 252, 903, 29, 661, 692, 928, 61, 929,
- 93, 442, 630, 723, 845, 930, 284, 904, 125, 379, 505, 815, 875,
- 931, 599, 754, 157, 932, 316, 568, 785, 905, 189, 933, 474, 846,
- 411, 876, 221, 934, 537, 816, 693, 348, 662, 724, 906, 253, 631,
- 755, 935, 443, 877, 30, 600, 786, 960, 62, 506, 847, 961, 94,
- 962, 285, 380, 907, 936, 126, 963, 569, 817, 158, 964, 317, 937,
- 190, 475, 694, 725, 878, 965, 412, 908, 663, 756, 538, 848, 222,
- 966, 632, 787, 349, 938, 254, 601, 818, 967, 444, 909, 507, 879,
- 31, 992, 63, 381, 939, 993, 286, 968, 95, 570, 849, 994, 726,
- 127, 695, 757, 995, 664, 788, 159, 996, 476, 910, 318, 969, 413,
- 539, 880, 940, 191, 633, 819, 997, 223, 998, 350, 602, 850, 970,
- 508, 911, 255, 445, 941, 999, 727, 758, 696, 789, 571, 881, 382,
- 971, 287, 665, 820, 1000, 477, 634, 851, 942, 540, 912, 319, 1001,
- 414, 972, 603, 882, 759, 728, 790, 351, 1002, 509, 697, 821, 943,
- 446, 973, 572, 913, 666, 852, 383, 1003, 635, 883, 478, 974, 541,
- 944, 415, 760, 791, 1004, 604, 914, 729, 822, 698, 853, 510, 975,
- 667, 884, 447, 573, 945, 1005, 636, 915, 792, 761, 823, 542, 976,
- 479, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 511, 1007, 574,
- 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
- 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
- 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
- 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
- 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
- 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
- 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
- 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
- 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
-};
-
-#if CONFIG_TX64X64
-DECLARE_ALIGNED(16, static const int16_t, default_scan_32x64[2048]) = {
- 0, 1, 32, 2, 33, 64, 3, 34, 65, 96, 4, 35, 66,
- 97, 128, 5, 36, 67, 98, 129, 160, 6, 37, 68, 99, 130,
- 161, 192, 7, 38, 69, 100, 131, 162, 193, 224, 8, 39, 70,
- 101, 132, 163, 194, 225, 256, 9, 40, 71, 102, 133, 164, 195,
- 226, 257, 288, 10, 41, 72, 103, 134, 165, 196, 227, 258, 289,
- 320, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352,
- 12, 43, 74, 105, 136, 167, 198, 229, 260, 291, 322, 353, 384,
- 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 14, 45, 76, 107, 138, 169, 200, 231, 262, 293, 324, 355,
- 386, 417, 448, 15, 46, 77, 108, 139, 170, 201, 232, 263, 294,
- 325, 356, 387, 418, 449, 480, 16, 47, 78, 109, 140, 171, 202,
- 233, 264, 295, 326, 357, 388, 419, 450, 481, 512, 17, 48, 79,
- 110, 141, 172, 203, 234, 265, 296, 327, 358, 389, 420, 451, 482,
- 513, 544, 18, 49, 80, 111, 142, 173, 204, 235, 266, 297, 328,
- 359, 390, 421, 452, 483, 514, 545, 576, 19, 50, 81, 112, 143,
- 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546,
- 577, 608, 20, 51, 82, 113, 144, 175, 206, 237, 268, 299, 330,
- 361, 392, 423, 454, 485, 516, 547, 578, 609, 640, 21, 52, 83,
- 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486,
- 517, 548, 579, 610, 641, 672, 22, 53, 84, 115, 146, 177, 208,
- 239, 270, 301, 332, 363, 394, 425, 456, 487, 518, 549, 580, 611,
- 642, 673, 704, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302,
- 333, 364, 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705,
- 736, 24, 55, 86, 117, 148, 179, 210, 241, 272, 303, 334, 365,
- 396, 427, 458, 489, 520, 551, 582, 613, 644, 675, 706, 737, 768,
- 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397,
- 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800,
- 26, 57, 88, 119, 150, 181, 212, 243, 274, 305, 336, 367, 398,
- 429, 460, 491, 522, 553, 584, 615, 646, 677, 708, 739, 770, 801,
- 832, 27, 58, 89, 120, 151, 182, 213, 244, 275, 306, 337, 368,
- 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771,
- 802, 833, 864, 28, 59, 90, 121, 152, 183, 214, 245, 276, 307,
- 338, 369, 400, 431, 462, 493, 524, 555, 586, 617, 648, 679, 710,
- 741, 772, 803, 834, 865, 896, 29, 60, 91, 122, 153, 184, 215,
- 246, 277, 308, 339, 370, 401, 432, 463, 494, 525, 556, 587, 618,
- 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 30, 61, 92,
- 123, 154, 185, 216, 247, 278, 309, 340, 371, 402, 433, 464, 495,
- 526, 557, 588, 619, 650, 681, 712, 743, 774, 805, 836, 867, 898,
- 929, 960, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341,
- 372, 403, 434, 465, 496, 527, 558, 589, 620, 651, 682, 713, 744,
- 775, 806, 837, 868, 899, 930, 961, 992, 63, 94, 125, 156, 187,
- 218, 249, 280, 311, 342, 373, 404, 435, 466, 497, 528, 559, 590,
- 621, 652, 683, 714, 745, 776, 807, 838, 869, 900, 931, 962, 993,
- 1024, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436,
- 467, 498, 529, 560, 591, 622, 653, 684, 715, 746, 777, 808, 839,
- 870, 901, 932, 963, 994, 1025, 1056, 127, 158, 189, 220, 251, 282,
- 313, 344, 375, 406, 437, 468, 499, 530, 561, 592, 623, 654, 685,
- 716, 747, 778, 809, 840, 871, 902, 933, 964, 995, 1026, 1057, 1088,
- 159, 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 531,
- 562, 593, 624, 655, 686, 717, 748, 779, 810, 841, 872, 903, 934,
- 965, 996, 1027, 1058, 1089, 1120, 191, 222, 253, 284, 315, 346, 377,
- 408, 439, 470, 501, 532, 563, 594, 625, 656, 687, 718, 749, 780,
- 811, 842, 873, 904, 935, 966, 997, 1028, 1059, 1090, 1121, 1152, 223,
- 254, 285, 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626,
- 657, 688, 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 1029,
- 1060, 1091, 1122, 1153, 1184, 255, 286, 317, 348, 379, 410, 441, 472,
- 503, 534, 565, 596, 627, 658, 689, 720, 751, 782, 813, 844, 875,
- 906, 937, 968, 999, 1030, 1061, 1092, 1123, 1154, 1185, 1216, 287, 318,
- 349, 380, 411, 442, 473, 504, 535, 566, 597, 628, 659, 690, 721,
- 752, 783, 814, 845, 876, 907, 938, 969, 1000, 1031, 1062, 1093, 1124,
- 1155, 1186, 1217, 1248, 319, 350, 381, 412, 443, 474, 505, 536, 567,
- 598, 629, 660, 691, 722, 753, 784, 815, 846, 877, 908, 939, 970,
- 1001, 1032, 1063, 1094, 1125, 1156, 1187, 1218, 1249, 1280, 351, 382, 413,
- 444, 475, 506, 537, 568, 599, 630, 661, 692, 723, 754, 785, 816,
- 847, 878, 909, 940, 971, 1002, 1033, 1064, 1095, 1126, 1157, 1188, 1219,
- 1250, 1281, 1312, 383, 414, 445, 476, 507, 538, 569, 600, 631, 662,
- 693, 724, 755, 786, 817, 848, 879, 910, 941, 972, 1003, 1034, 1065,
- 1096, 1127, 1158, 1189, 1220, 1251, 1282, 1313, 1344, 415, 446, 477, 508,
- 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911,
- 942, 973, 1004, 1035, 1066, 1097, 1128, 1159, 1190, 1221, 1252, 1283, 1314,
- 1345, 1376, 447, 478, 509, 540, 571, 602, 633, 664, 695, 726, 757,
- 788, 819, 850, 881, 912, 943, 974, 1005, 1036, 1067, 1098, 1129, 1160,
- 1191, 1222, 1253, 1284, 1315, 1346, 1377, 1408, 479, 510, 541, 572, 603,
- 634, 665, 696, 727, 758, 789, 820, 851, 882, 913, 944, 975, 1006,
- 1037, 1068, 1099, 1130, 1161, 1192, 1223, 1254, 1285, 1316, 1347, 1378, 1409,
- 1440, 511, 542, 573, 604, 635, 666, 697, 728, 759, 790, 821, 852,
- 883, 914, 945, 976, 1007, 1038, 1069, 1100, 1131, 1162, 1193, 1224, 1255,
- 1286, 1317, 1348, 1379, 1410, 1441, 1472, 543, 574, 605, 636, 667, 698,
- 729, 760, 791, 822, 853, 884, 915, 946, 977, 1008, 1039, 1070, 1101,
- 1132, 1163, 1194, 1225, 1256, 1287, 1318, 1349, 1380, 1411, 1442, 1473, 1504,
- 575, 606, 637, 668, 699, 730, 761, 792, 823, 854, 885, 916, 947,
- 978, 1009, 1040, 1071, 1102, 1133, 1164, 1195, 1226, 1257, 1288, 1319, 1350,
- 1381, 1412, 1443, 1474, 1505, 1536, 607, 638, 669, 700, 731, 762, 793,
- 824, 855, 886, 917, 948, 979, 1010, 1041, 1072, 1103, 1134, 1165, 1196,
- 1227, 1258, 1289, 1320, 1351, 1382, 1413, 1444, 1475, 1506, 1537, 1568, 639,
- 670, 701, 732, 763, 794, 825, 856, 887, 918, 949, 980, 1011, 1042,
- 1073, 1104, 1135, 1166, 1197, 1228, 1259, 1290, 1321, 1352, 1383, 1414, 1445,
- 1476, 1507, 1538, 1569, 1600, 671, 702, 733, 764, 795, 826, 857, 888,
- 919, 950, 981, 1012, 1043, 1074, 1105, 1136, 1167, 1198, 1229, 1260, 1291,
- 1322, 1353, 1384, 1415, 1446, 1477, 1508, 1539, 1570, 1601, 1632, 703, 734,
- 765, 796, 827, 858, 889, 920, 951, 982, 1013, 1044, 1075, 1106, 1137,
- 1168, 1199, 1230, 1261, 1292, 1323, 1354, 1385, 1416, 1447, 1478, 1509, 1540,
- 1571, 1602, 1633, 1664, 735, 766, 797, 828, 859, 890, 921, 952, 983,
- 1014, 1045, 1076, 1107, 1138, 1169, 1200, 1231, 1262, 1293, 1324, 1355, 1386,
- 1417, 1448, 1479, 1510, 1541, 1572, 1603, 1634, 1665, 1696, 767, 798, 829,
- 860, 891, 922, 953, 984, 1015, 1046, 1077, 1108, 1139, 1170, 1201, 1232,
- 1263, 1294, 1325, 1356, 1387, 1418, 1449, 1480, 1511, 1542, 1573, 1604, 1635,
- 1666, 1697, 1728, 799, 830, 861, 892, 923, 954, 985, 1016, 1047, 1078,
- 1109, 1140, 1171, 1202, 1233, 1264, 1295, 1326, 1357, 1388, 1419, 1450, 1481,
- 1512, 1543, 1574, 1605, 1636, 1667, 1698, 1729, 1760, 831, 862, 893, 924,
- 955, 986, 1017, 1048, 1079, 1110, 1141, 1172, 1203, 1234, 1265, 1296, 1327,
- 1358, 1389, 1420, 1451, 1482, 1513, 1544, 1575, 1606, 1637, 1668, 1699, 1730,
- 1761, 1792, 863, 894, 925, 956, 987, 1018, 1049, 1080, 1111, 1142, 1173,
- 1204, 1235, 1266, 1297, 1328, 1359, 1390, 1421, 1452, 1483, 1514, 1545, 1576,
- 1607, 1638, 1669, 1700, 1731, 1762, 1793, 1824, 895, 926, 957, 988, 1019,
- 1050, 1081, 1112, 1143, 1174, 1205, 1236, 1267, 1298, 1329, 1360, 1391, 1422,
- 1453, 1484, 1515, 1546, 1577, 1608, 1639, 1670, 1701, 1732, 1763, 1794, 1825,
- 1856, 927, 958, 989, 1020, 1051, 1082, 1113, 1144, 1175, 1206, 1237, 1268,
- 1299, 1330, 1361, 1392, 1423, 1454, 1485, 1516, 1547, 1578, 1609, 1640, 1671,
- 1702, 1733, 1764, 1795, 1826, 1857, 1888, 959, 990, 1021, 1052, 1083, 1114,
- 1145, 1176, 1207, 1238, 1269, 1300, 1331, 1362, 1393, 1424, 1455, 1486, 1517,
- 1548, 1579, 1610, 1641, 1672, 1703, 1734, 1765, 1796, 1827, 1858, 1889, 1920,
- 991, 1022, 1053, 1084, 1115, 1146, 1177, 1208, 1239, 1270, 1301, 1332, 1363,
- 1394, 1425, 1456, 1487, 1518, 1549, 1580, 1611, 1642, 1673, 1704, 1735, 1766,
- 1797, 1828, 1859, 1890, 1921, 1952, 1023, 1054, 1085, 1116, 1147, 1178, 1209,
- 1240, 1271, 1302, 1333, 1364, 1395, 1426, 1457, 1488, 1519, 1550, 1581, 1612,
- 1643, 1674, 1705, 1736, 1767, 1798, 1829, 1860, 1891, 1922, 1953, 1984, 1055,
- 1086, 1117, 1148, 1179, 1210, 1241, 1272, 1303, 1334, 1365, 1396, 1427, 1458,
- 1489, 1520, 1551, 1582, 1613, 1644, 1675, 1706, 1737, 1768, 1799, 1830, 1861,
- 1892, 1923, 1954, 1985, 2016, 1087, 1118, 1149, 1180, 1211, 1242, 1273, 1304,
- 1335, 1366, 1397, 1428, 1459, 1490, 1521, 1552, 1583, 1614, 1645, 1676, 1707,
- 1738, 1769, 1800, 1831, 1862, 1893, 1924, 1955, 1986, 2017, 1119, 1150, 1181,
- 1212, 1243, 1274, 1305, 1336, 1367, 1398, 1429, 1460, 1491, 1522, 1553, 1584,
- 1615, 1646, 1677, 1708, 1739, 1770, 1801, 1832, 1863, 1894, 1925, 1956, 1987,
- 2018, 1151, 1182, 1213, 1244, 1275, 1306, 1337, 1368, 1399, 1430, 1461, 1492,
- 1523, 1554, 1585, 1616, 1647, 1678, 1709, 1740, 1771, 1802, 1833, 1864, 1895,
- 1926, 1957, 1988, 2019, 1183, 1214, 1245, 1276, 1307, 1338, 1369, 1400, 1431,
- 1462, 1493, 1524, 1555, 1586, 1617, 1648, 1679, 1710, 1741, 1772, 1803, 1834,
- 1865, 1896, 1927, 1958, 1989, 2020, 1215, 1246, 1277, 1308, 1339, 1370, 1401,
- 1432, 1463, 1494, 1525, 1556, 1587, 1618, 1649, 1680, 1711, 1742, 1773, 1804,
- 1835, 1866, 1897, 1928, 1959, 1990, 2021, 1247, 1278, 1309, 1340, 1371, 1402,
- 1433, 1464, 1495, 1526, 1557, 1588, 1619, 1650, 1681, 1712, 1743, 1774, 1805,
- 1836, 1867, 1898, 1929, 1960, 1991, 2022, 1279, 1310, 1341, 1372, 1403, 1434,
- 1465, 1496, 1527, 1558, 1589, 1620, 1651, 1682, 1713, 1744, 1775, 1806, 1837,
- 1868, 1899, 1930, 1961, 1992, 2023, 1311, 1342, 1373, 1404, 1435, 1466, 1497,
- 1528, 1559, 1590, 1621, 1652, 1683, 1714, 1745, 1776, 1807, 1838, 1869, 1900,
- 1931, 1962, 1993, 2024, 1343, 1374, 1405, 1436, 1467, 1498, 1529, 1560, 1591,
- 1622, 1653, 1684, 1715, 1746, 1777, 1808, 1839, 1870, 1901, 1932, 1963, 1994,
- 2025, 1375, 1406, 1437, 1468, 1499, 1530, 1561, 1592, 1623, 1654, 1685, 1716,
- 1747, 1778, 1809, 1840, 1871, 1902, 1933, 1964, 1995, 2026, 1407, 1438, 1469,
- 1500, 1531, 1562, 1593, 1624, 1655, 1686, 1717, 1748, 1779, 1810, 1841, 1872,
- 1903, 1934, 1965, 1996, 2027, 1439, 1470, 1501, 1532, 1563, 1594, 1625, 1656,
- 1687, 1718, 1749, 1780, 1811, 1842, 1873, 1904, 1935, 1966, 1997, 2028, 1471,
- 1502, 1533, 1564, 1595, 1626, 1657, 1688, 1719, 1750, 1781, 1812, 1843, 1874,
- 1905, 1936, 1967, 1998, 2029, 1503, 1534, 1565, 1596, 1627, 1658, 1689, 1720,
- 1751, 1782, 1813, 1844, 1875, 1906, 1937, 1968, 1999, 2030, 1535, 1566, 1597,
- 1628, 1659, 1690, 1721, 1752, 1783, 1814, 1845, 1876, 1907, 1938, 1969, 2000,
- 2031, 1567, 1598, 1629, 1660, 1691, 1722, 1753, 1784, 1815, 1846, 1877, 1908,
- 1939, 1970, 2001, 2032, 1599, 1630, 1661, 1692, 1723, 1754, 1785, 1816, 1847,
- 1878, 1909, 1940, 1971, 2002, 2033, 1631, 1662, 1693, 1724, 1755, 1786, 1817,
- 1848, 1879, 1910, 1941, 1972, 2003, 2034, 1663, 1694, 1725, 1756, 1787, 1818,
- 1849, 1880, 1911, 1942, 1973, 2004, 2035, 1695, 1726, 1757, 1788, 1819, 1850,
- 1881, 1912, 1943, 1974, 2005, 2036, 1727, 1758, 1789, 1820, 1851, 1882, 1913,
- 1944, 1975, 2006, 2037, 1759, 1790, 1821, 1852, 1883, 1914, 1945, 1976, 2007,
- 2038, 1791, 1822, 1853, 1884, 1915, 1946, 1977, 2008, 2039, 1823, 1854, 1885,
- 1916, 1947, 1978, 2009, 2040, 1855, 1886, 1917, 1948, 1979, 2010, 2041, 1887,
- 1918, 1949, 1980, 2011, 2042, 1919, 1950, 1981, 2012, 2043, 1951, 1982, 2013,
- 2044, 1983, 2014, 2045, 2015, 2046, 2047,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_64x32[2048]) = {
- 0, 1, 64, 2, 65, 128, 3, 66, 129, 192, 4, 67, 130,
- 193, 256, 5, 68, 131, 194, 257, 320, 6, 69, 132, 195, 258,
- 321, 384, 7, 70, 133, 196, 259, 322, 385, 448, 8, 71, 134,
- 197, 260, 323, 386, 449, 512, 9, 72, 135, 198, 261, 324, 387,
- 450, 513, 576, 10, 73, 136, 199, 262, 325, 388, 451, 514, 577,
- 640, 11, 74, 137, 200, 263, 326, 389, 452, 515, 578, 641, 704,
- 12, 75, 138, 201, 264, 327, 390, 453, 516, 579, 642, 705, 768,
- 13, 76, 139, 202, 265, 328, 391, 454, 517, 580, 643, 706, 769,
- 832, 14, 77, 140, 203, 266, 329, 392, 455, 518, 581, 644, 707,
- 770, 833, 896, 15, 78, 141, 204, 267, 330, 393, 456, 519, 582,
- 645, 708, 771, 834, 897, 960, 16, 79, 142, 205, 268, 331, 394,
- 457, 520, 583, 646, 709, 772, 835, 898, 961, 1024, 17, 80, 143,
- 206, 269, 332, 395, 458, 521, 584, 647, 710, 773, 836, 899, 962,
- 1025, 1088, 18, 81, 144, 207, 270, 333, 396, 459, 522, 585, 648,
- 711, 774, 837, 900, 963, 1026, 1089, 1152, 19, 82, 145, 208, 271,
- 334, 397, 460, 523, 586, 649, 712, 775, 838, 901, 964, 1027, 1090,
- 1153, 1216, 20, 83, 146, 209, 272, 335, 398, 461, 524, 587, 650,
- 713, 776, 839, 902, 965, 1028, 1091, 1154, 1217, 1280, 21, 84, 147,
- 210, 273, 336, 399, 462, 525, 588, 651, 714, 777, 840, 903, 966,
- 1029, 1092, 1155, 1218, 1281, 1344, 22, 85, 148, 211, 274, 337, 400,
- 463, 526, 589, 652, 715, 778, 841, 904, 967, 1030, 1093, 1156, 1219,
- 1282, 1345, 1408, 23, 86, 149, 212, 275, 338, 401, 464, 527, 590,
- 653, 716, 779, 842, 905, 968, 1031, 1094, 1157, 1220, 1283, 1346, 1409,
- 1472, 24, 87, 150, 213, 276, 339, 402, 465, 528, 591, 654, 717,
- 780, 843, 906, 969, 1032, 1095, 1158, 1221, 1284, 1347, 1410, 1473, 1536,
- 25, 88, 151, 214, 277, 340, 403, 466, 529, 592, 655, 718, 781,
- 844, 907, 970, 1033, 1096, 1159, 1222, 1285, 1348, 1411, 1474, 1537, 1600,
- 26, 89, 152, 215, 278, 341, 404, 467, 530, 593, 656, 719, 782,
- 845, 908, 971, 1034, 1097, 1160, 1223, 1286, 1349, 1412, 1475, 1538, 1601,
- 1664, 27, 90, 153, 216, 279, 342, 405, 468, 531, 594, 657, 720,
- 783, 846, 909, 972, 1035, 1098, 1161, 1224, 1287, 1350, 1413, 1476, 1539,
- 1602, 1665, 1728, 28, 91, 154, 217, 280, 343, 406, 469, 532, 595,
- 658, 721, 784, 847, 910, 973, 1036, 1099, 1162, 1225, 1288, 1351, 1414,
- 1477, 1540, 1603, 1666, 1729, 1792, 29, 92, 155, 218, 281, 344, 407,
- 470, 533, 596, 659, 722, 785, 848, 911, 974, 1037, 1100, 1163, 1226,
- 1289, 1352, 1415, 1478, 1541, 1604, 1667, 1730, 1793, 1856, 30, 93, 156,
- 219, 282, 345, 408, 471, 534, 597, 660, 723, 786, 849, 912, 975,
- 1038, 1101, 1164, 1227, 1290, 1353, 1416, 1479, 1542, 1605, 1668, 1731, 1794,
- 1857, 1920, 31, 94, 157, 220, 283, 346, 409, 472, 535, 598, 661,
- 724, 787, 850, 913, 976, 1039, 1102, 1165, 1228, 1291, 1354, 1417, 1480,
- 1543, 1606, 1669, 1732, 1795, 1858, 1921, 1984, 32, 95, 158, 221, 284,
- 347, 410, 473, 536, 599, 662, 725, 788, 851, 914, 977, 1040, 1103,
- 1166, 1229, 1292, 1355, 1418, 1481, 1544, 1607, 1670, 1733, 1796, 1859, 1922,
- 1985, 33, 96, 159, 222, 285, 348, 411, 474, 537, 600, 663, 726,
- 789, 852, 915, 978, 1041, 1104, 1167, 1230, 1293, 1356, 1419, 1482, 1545,
- 1608, 1671, 1734, 1797, 1860, 1923, 1986, 34, 97, 160, 223, 286, 349,
- 412, 475, 538, 601, 664, 727, 790, 853, 916, 979, 1042, 1105, 1168,
- 1231, 1294, 1357, 1420, 1483, 1546, 1609, 1672, 1735, 1798, 1861, 1924, 1987,
- 35, 98, 161, 224, 287, 350, 413, 476, 539, 602, 665, 728, 791,
- 854, 917, 980, 1043, 1106, 1169, 1232, 1295, 1358, 1421, 1484, 1547, 1610,
- 1673, 1736, 1799, 1862, 1925, 1988, 36, 99, 162, 225, 288, 351, 414,
- 477, 540, 603, 666, 729, 792, 855, 918, 981, 1044, 1107, 1170, 1233,
- 1296, 1359, 1422, 1485, 1548, 1611, 1674, 1737, 1800, 1863, 1926, 1989, 37,
- 100, 163, 226, 289, 352, 415, 478, 541, 604, 667, 730, 793, 856,
- 919, 982, 1045, 1108, 1171, 1234, 1297, 1360, 1423, 1486, 1549, 1612, 1675,
- 1738, 1801, 1864, 1927, 1990, 38, 101, 164, 227, 290, 353, 416, 479,
- 542, 605, 668, 731, 794, 857, 920, 983, 1046, 1109, 1172, 1235, 1298,
- 1361, 1424, 1487, 1550, 1613, 1676, 1739, 1802, 1865, 1928, 1991, 39, 102,
- 165, 228, 291, 354, 417, 480, 543, 606, 669, 732, 795, 858, 921,
- 984, 1047, 1110, 1173, 1236, 1299, 1362, 1425, 1488, 1551, 1614, 1677, 1740,
- 1803, 1866, 1929, 1992, 40, 103, 166, 229, 292, 355, 418, 481, 544,
- 607, 670, 733, 796, 859, 922, 985, 1048, 1111, 1174, 1237, 1300, 1363,
- 1426, 1489, 1552, 1615, 1678, 1741, 1804, 1867, 1930, 1993, 41, 104, 167,
- 230, 293, 356, 419, 482, 545, 608, 671, 734, 797, 860, 923, 986,
- 1049, 1112, 1175, 1238, 1301, 1364, 1427, 1490, 1553, 1616, 1679, 1742, 1805,
- 1868, 1931, 1994, 42, 105, 168, 231, 294, 357, 420, 483, 546, 609,
- 672, 735, 798, 861, 924, 987, 1050, 1113, 1176, 1239, 1302, 1365, 1428,
- 1491, 1554, 1617, 1680, 1743, 1806, 1869, 1932, 1995, 43, 106, 169, 232,
- 295, 358, 421, 484, 547, 610, 673, 736, 799, 862, 925, 988, 1051,
- 1114, 1177, 1240, 1303, 1366, 1429, 1492, 1555, 1618, 1681, 1744, 1807, 1870,
- 1933, 1996, 44, 107, 170, 233, 296, 359, 422, 485, 548, 611, 674,
- 737, 800, 863, 926, 989, 1052, 1115, 1178, 1241, 1304, 1367, 1430, 1493,
- 1556, 1619, 1682, 1745, 1808, 1871, 1934, 1997, 45, 108, 171, 234, 297,
- 360, 423, 486, 549, 612, 675, 738, 801, 864, 927, 990, 1053, 1116,
- 1179, 1242, 1305, 1368, 1431, 1494, 1557, 1620, 1683, 1746, 1809, 1872, 1935,
- 1998, 46, 109, 172, 235, 298, 361, 424, 487, 550, 613, 676, 739,
- 802, 865, 928, 991, 1054, 1117, 1180, 1243, 1306, 1369, 1432, 1495, 1558,
- 1621, 1684, 1747, 1810, 1873, 1936, 1999, 47, 110, 173, 236, 299, 362,
- 425, 488, 551, 614, 677, 740, 803, 866, 929, 992, 1055, 1118, 1181,
- 1244, 1307, 1370, 1433, 1496, 1559, 1622, 1685, 1748, 1811, 1874, 1937, 2000,
- 48, 111, 174, 237, 300, 363, 426, 489, 552, 615, 678, 741, 804,
- 867, 930, 993, 1056, 1119, 1182, 1245, 1308, 1371, 1434, 1497, 1560, 1623,
- 1686, 1749, 1812, 1875, 1938, 2001, 49, 112, 175, 238, 301, 364, 427,
- 490, 553, 616, 679, 742, 805, 868, 931, 994, 1057, 1120, 1183, 1246,
- 1309, 1372, 1435, 1498, 1561, 1624, 1687, 1750, 1813, 1876, 1939, 2002, 50,
- 113, 176, 239, 302, 365, 428, 491, 554, 617, 680, 743, 806, 869,
- 932, 995, 1058, 1121, 1184, 1247, 1310, 1373, 1436, 1499, 1562, 1625, 1688,
- 1751, 1814, 1877, 1940, 2003, 51, 114, 177, 240, 303, 366, 429, 492,
- 555, 618, 681, 744, 807, 870, 933, 996, 1059, 1122, 1185, 1248, 1311,
- 1374, 1437, 1500, 1563, 1626, 1689, 1752, 1815, 1878, 1941, 2004, 52, 115,
- 178, 241, 304, 367, 430, 493, 556, 619, 682, 745, 808, 871, 934,
- 997, 1060, 1123, 1186, 1249, 1312, 1375, 1438, 1501, 1564, 1627, 1690, 1753,
- 1816, 1879, 1942, 2005, 53, 116, 179, 242, 305, 368, 431, 494, 557,
- 620, 683, 746, 809, 872, 935, 998, 1061, 1124, 1187, 1250, 1313, 1376,
- 1439, 1502, 1565, 1628, 1691, 1754, 1817, 1880, 1943, 2006, 54, 117, 180,
- 243, 306, 369, 432, 495, 558, 621, 684, 747, 810, 873, 936, 999,
- 1062, 1125, 1188, 1251, 1314, 1377, 1440, 1503, 1566, 1629, 1692, 1755, 1818,
- 1881, 1944, 2007, 55, 118, 181, 244, 307, 370, 433, 496, 559, 622,
- 685, 748, 811, 874, 937, 1000, 1063, 1126, 1189, 1252, 1315, 1378, 1441,
- 1504, 1567, 1630, 1693, 1756, 1819, 1882, 1945, 2008, 56, 119, 182, 245,
- 308, 371, 434, 497, 560, 623, 686, 749, 812, 875, 938, 1001, 1064,
- 1127, 1190, 1253, 1316, 1379, 1442, 1505, 1568, 1631, 1694, 1757, 1820, 1883,
- 1946, 2009, 57, 120, 183, 246, 309, 372, 435, 498, 561, 624, 687,
- 750, 813, 876, 939, 1002, 1065, 1128, 1191, 1254, 1317, 1380, 1443, 1506,
- 1569, 1632, 1695, 1758, 1821, 1884, 1947, 2010, 58, 121, 184, 247, 310,
- 373, 436, 499, 562, 625, 688, 751, 814, 877, 940, 1003, 1066, 1129,
- 1192, 1255, 1318, 1381, 1444, 1507, 1570, 1633, 1696, 1759, 1822, 1885, 1948,
- 2011, 59, 122, 185, 248, 311, 374, 437, 500, 563, 626, 689, 752,
- 815, 878, 941, 1004, 1067, 1130, 1193, 1256, 1319, 1382, 1445, 1508, 1571,
- 1634, 1697, 1760, 1823, 1886, 1949, 2012, 60, 123, 186, 249, 312, 375,
- 438, 501, 564, 627, 690, 753, 816, 879, 942, 1005, 1068, 1131, 1194,
- 1257, 1320, 1383, 1446, 1509, 1572, 1635, 1698, 1761, 1824, 1887, 1950, 2013,
- 61, 124, 187, 250, 313, 376, 439, 502, 565, 628, 691, 754, 817,
- 880, 943, 1006, 1069, 1132, 1195, 1258, 1321, 1384, 1447, 1510, 1573, 1636,
- 1699, 1762, 1825, 1888, 1951, 2014, 62, 125, 188, 251, 314, 377, 440,
- 503, 566, 629, 692, 755, 818, 881, 944, 1007, 1070, 1133, 1196, 1259,
- 1322, 1385, 1448, 1511, 1574, 1637, 1700, 1763, 1826, 1889, 1952, 2015, 63,
- 126, 189, 252, 315, 378, 441, 504, 567, 630, 693, 756, 819, 882,
- 945, 1008, 1071, 1134, 1197, 1260, 1323, 1386, 1449, 1512, 1575, 1638, 1701,
- 1764, 1827, 1890, 1953, 2016, 127, 190, 253, 316, 379, 442, 505, 568,
- 631, 694, 757, 820, 883, 946, 1009, 1072, 1135, 1198, 1261, 1324, 1387,
- 1450, 1513, 1576, 1639, 1702, 1765, 1828, 1891, 1954, 2017, 191, 254, 317,
- 380, 443, 506, 569, 632, 695, 758, 821, 884, 947, 1010, 1073, 1136,
- 1199, 1262, 1325, 1388, 1451, 1514, 1577, 1640, 1703, 1766, 1829, 1892, 1955,
- 2018, 255, 318, 381, 444, 507, 570, 633, 696, 759, 822, 885, 948,
- 1011, 1074, 1137, 1200, 1263, 1326, 1389, 1452, 1515, 1578, 1641, 1704, 1767,
- 1830, 1893, 1956, 2019, 319, 382, 445, 508, 571, 634, 697, 760, 823,
- 886, 949, 1012, 1075, 1138, 1201, 1264, 1327, 1390, 1453, 1516, 1579, 1642,
- 1705, 1768, 1831, 1894, 1957, 2020, 383, 446, 509, 572, 635, 698, 761,
- 824, 887, 950, 1013, 1076, 1139, 1202, 1265, 1328, 1391, 1454, 1517, 1580,
- 1643, 1706, 1769, 1832, 1895, 1958, 2021, 447, 510, 573, 636, 699, 762,
- 825, 888, 951, 1014, 1077, 1140, 1203, 1266, 1329, 1392, 1455, 1518, 1581,
- 1644, 1707, 1770, 1833, 1896, 1959, 2022, 511, 574, 637, 700, 763, 826,
- 889, 952, 1015, 1078, 1141, 1204, 1267, 1330, 1393, 1456, 1519, 1582, 1645,
- 1708, 1771, 1834, 1897, 1960, 2023, 575, 638, 701, 764, 827, 890, 953,
- 1016, 1079, 1142, 1205, 1268, 1331, 1394, 1457, 1520, 1583, 1646, 1709, 1772,
- 1835, 1898, 1961, 2024, 639, 702, 765, 828, 891, 954, 1017, 1080, 1143,
- 1206, 1269, 1332, 1395, 1458, 1521, 1584, 1647, 1710, 1773, 1836, 1899, 1962,
- 2025, 703, 766, 829, 892, 955, 1018, 1081, 1144, 1207, 1270, 1333, 1396,
- 1459, 1522, 1585, 1648, 1711, 1774, 1837, 1900, 1963, 2026, 767, 830, 893,
- 956, 1019, 1082, 1145, 1208, 1271, 1334, 1397, 1460, 1523, 1586, 1649, 1712,
- 1775, 1838, 1901, 1964, 2027, 831, 894, 957, 1020, 1083, 1146, 1209, 1272,
- 1335, 1398, 1461, 1524, 1587, 1650, 1713, 1776, 1839, 1902, 1965, 2028, 895,
- 958, 1021, 1084, 1147, 1210, 1273, 1336, 1399, 1462, 1525, 1588, 1651, 1714,
- 1777, 1840, 1903, 1966, 2029, 959, 1022, 1085, 1148, 1211, 1274, 1337, 1400,
- 1463, 1526, 1589, 1652, 1715, 1778, 1841, 1904, 1967, 2030, 1023, 1086, 1149,
- 1212, 1275, 1338, 1401, 1464, 1527, 1590, 1653, 1716, 1779, 1842, 1905, 1968,
- 2031, 1087, 1150, 1213, 1276, 1339, 1402, 1465, 1528, 1591, 1654, 1717, 1780,
- 1843, 1906, 1969, 2032, 1151, 1214, 1277, 1340, 1403, 1466, 1529, 1592, 1655,
- 1718, 1781, 1844, 1907, 1970, 2033, 1215, 1278, 1341, 1404, 1467, 1530, 1593,
- 1656, 1719, 1782, 1845, 1908, 1971, 2034, 1279, 1342, 1405, 1468, 1531, 1594,
- 1657, 1720, 1783, 1846, 1909, 1972, 2035, 1343, 1406, 1469, 1532, 1595, 1658,
- 1721, 1784, 1847, 1910, 1973, 2036, 1407, 1470, 1533, 1596, 1659, 1722, 1785,
- 1848, 1911, 1974, 2037, 1471, 1534, 1597, 1660, 1723, 1786, 1849, 1912, 1975,
- 2038, 1535, 1598, 1661, 1724, 1787, 1850, 1913, 1976, 2039, 1599, 1662, 1725,
- 1788, 1851, 1914, 1977, 2040, 1663, 1726, 1789, 1852, 1915, 1978, 2041, 1727,
- 1790, 1853, 1916, 1979, 2042, 1791, 1854, 1917, 1980, 2043, 1855, 1918, 1981,
- 2044, 1919, 1982, 2045, 1983, 2046, 2047,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_64x64[4096]) = {
- 0, 1, 64, 65, 2, 128, 66, 129, 130, 3, 192, 67, 193,
- 131, 194, 4, 256, 68, 257, 195, 132, 258, 5, 196, 259, 320,
- 69, 321, 133, 322, 260, 197, 323, 6, 384, 70, 385, 134, 386,
- 261, 324, 198, 387, 7, 448, 71, 325, 449, 262, 388, 135, 450,
- 199, 451, 326, 389, 8, 512, 72, 263, 452, 513, 136, 514, 390,
- 200, 515, 327, 453, 264, 516, 9, 576, 73, 577, 137, 391, 454,
- 578, 328, 517, 201, 579, 265, 580, 455, 10, 392, 518, 640, 74,
- 641, 138, 642, 329, 581, 202, 643, 456, 519, 266, 644, 393, 582,
- 11, 704, 75, 705, 139, 330, 645, 706, 520, 203, 457, 583, 707,
- 394, 646, 267, 708, 12, 768, 76, 521, 584, 769, 331, 709, 140,
- 770, 458, 647, 204, 771, 395, 710, 268, 772, 585, 522, 648, 13,
- 332, 773, 832, 77, 459, 711, 833, 141, 834, 205, 835, 396, 774,
- 586, 649, 269, 523, 712, 836, 460, 775, 333, 837, 14, 896, 78,
- 897, 142, 650, 898, 587, 713, 206, 397, 838, 899, 524, 776, 270,
- 900, 461, 839, 334, 651, 714, 901, 15, 588, 777, 960, 79, 961,
- 143, 962, 398, 902, 525, 840, 207, 963, 271, 964, 715, 652, 778,
- 462, 903, 335, 589, 841, 965, 16, 1024, 80, 1025, 144, 526, 904,
- 1026, 399, 966, 208, 716, 779, 1027, 653, 842, 272, 1028, 463, 967,
- 590, 905, 336, 1029, 780, 17, 527, 968, 1088, 81, 717, 843, 1089,
- 400, 1030, 145, 1090, 654, 906, 209, 1091, 273, 464, 1031, 1092, 591,
- 969, 781, 844, 337, 1093, 718, 907, 528, 1032, 18, 1152, 82, 401,
- 655, 970, 1094, 1153, 146, 1154, 210, 1155, 592, 1033, 465, 845, 1095,
- 274, 782, 908, 1156, 719, 971, 338, 1157, 529, 1096, 656, 1034, 402,
- 1158, 19, 1216, 83, 1217, 147, 846, 909, 1218, 783, 972, 211, 593,
- 1097, 1219, 466, 1159, 275, 720, 1035, 1220, 339, 1221, 530, 1160, 657,
- 1098, 910, 847, 973, 403, 1222, 20, 784, 1036, 1280, 84, 1281, 148,
- 1282, 594, 1161, 212, 1283, 467, 721, 1099, 1223, 276, 1284, 911, 974,
- 658, 1162, 340, 531, 848, 1037, 1224, 1285, 785, 1100, 404, 1286, 21,
- 1344, 85, 595, 1225, 1345, 149, 722, 1163, 1346, 468, 1287, 213, 975,
- 1347, 912, 1038, 277, 1348, 849, 1101, 659, 1226, 532, 1288, 341, 1349,
- 786, 1164, 405, 1350, 596, 976, 1039, 1289, 723, 1227, 22, 1408, 86,
- 913, 1102, 1409, 150, 1410, 469, 1351, 214, 850, 1165, 1411, 278, 660,
- 1290, 1412, 533, 787, 1228, 1352, 342, 1413, 1040, 977, 1103, 406, 914,
- 1166, 1414, 724, 1291, 597, 1353, 23, 1472, 87, 851, 1229, 1473, 151,
- 470, 1415, 1474, 215, 1475, 661, 1354, 788, 1292, 279, 1041, 1104, 1476,
- 534, 1416, 978, 1167, 343, 1477, 915, 1230, 725, 1355, 407, 598, 1417,
- 1478, 852, 1293, 24, 1536, 88, 1537, 471, 1105, 1479, 152, 1042, 1168,
- 1538, 662, 1418, 216, 789, 1356, 1539, 979, 1231, 280, 1540, 535, 1480,
- 916, 1294, 344, 1541, 726, 1419, 599, 853, 1357, 1481, 408, 1542, 1106,
- 1169, 1043, 1232, 25, 472, 980, 1295, 1543, 1600, 89, 1601, 790, 1420,
- 153, 663, 1482, 1602, 217, 1603, 917, 1358, 536, 1544, 281, 1604, 1170,
- 345, 727, 1107, 1233, 1483, 1605, 854, 1421, 1044, 1296, 600, 1545, 409,
- 1606, 981, 1359, 791, 1484, 473, 1607, 26, 664, 1546, 1664, 90, 1665,
- 154, 918, 1422, 1666, 218, 1171, 1234, 1667, 537, 1108, 1297, 1608, 282,
- 1668, 728, 1045, 1360, 1547, 855, 1485, 346, 1669, 601, 1609, 982, 1423,
- 410, 1670, 792, 1548, 1235, 1172, 1298, 474, 665, 919, 1486, 1610, 1671,
- 27, 1728, 91, 1109, 1361, 1729, 155, 1730, 219, 1731, 538, 1046, 1424,
- 1672, 283, 856, 1549, 1732, 729, 1611, 347, 983, 1487, 1733, 602, 1673,
- 1236, 1299, 411, 1173, 1362, 1734, 793, 1612, 920, 1550, 1110, 1425, 666,
- 1674, 475, 1735, 28, 1792, 92, 1047, 1488, 1793, 156, 1794, 220, 539,
- 1736, 1795, 857, 1613, 730, 1675, 284, 1300, 1796, 984, 1551, 1237, 1363,
- 1174, 1426, 348, 1797, 603, 1737, 1111, 1489, 412, 794, 1676, 1798, 921,
- 1614, 667, 1738, 1048, 1552, 476, 1799, 29, 1301, 1364, 1856, 93, 1857,
- 157, 858, 1238, 1427, 1677, 1858, 540, 1800, 221, 731, 985, 1615, 1739,
- 1859, 1175, 1490, 285, 1860, 604, 1112, 1553, 1801, 349, 1861, 922, 1678,
- 795, 1740, 413, 1862, 1049, 1616, 1365, 668, 1302, 1428, 1802, 477, 1239,
- 1491, 1863, 859, 1741, 30, 1176, 1554, 1920, 94, 986, 1679, 1921, 158,
- 1922, 541, 732, 1803, 1864, 222, 1923, 1113, 1617, 286, 1924, 605, 1865,
- 350, 923, 1366, 1429, 1742, 1925, 796, 1804, 1303, 1492, 1050, 1680, 414,
- 1926, 1240, 1555, 669, 1866, 478, 1177, 1618, 1927, 860, 1805, 987, 1743,
- 31, 1984, 95, 733, 1867, 1985, 542, 1928, 159, 1114, 1681, 1986, 1430,
- 223, 1367, 1493, 1987, 1304, 1556, 287, 1988, 924, 1806, 606, 1929, 797,
- 1051, 1744, 1868, 351, 1241, 1619, 1989, 415, 1990, 670, 1178, 1682, 1930,
- 988, 1807, 479, 861, 1869, 1991, 1431, 1494, 1368, 1557, 1115, 1745, 734,
- 1931, 32, 2048, 96, 543, 1305, 1620, 1992, 2049, 160, 2050, 224, 2051,
- 925, 1242, 1683, 1870, 288, 1052, 1808, 2052, 607, 1993, 798, 1932, 352,
- 2053, 1179, 1746, 1495, 416, 1432, 1558, 2054, 671, 1994, 989, 1369, 1621,
- 1871, 862, 1933, 480, 1116, 1809, 2055, 1306, 1684, 735, 1995, 544, 2056,
- 33, 2112, 97, 1243, 1747, 2113, 161, 2114, 926, 1934, 1053, 1872, 225,
- 2115, 289, 608, 799, 1496, 1559, 1996, 2057, 2116, 1180, 1810, 1433, 1622,
- 353, 2117, 1370, 1685, 672, 2058, 417, 990, 1935, 2118, 1307, 1748, 863,
- 1117, 1873, 1997, 481, 2119, 736, 1244, 1811, 2059, 1560, 545, 2120, 1497,
- 1623, 34, 1054, 1936, 2176, 98, 927, 1998, 2177, 162, 1434, 1686, 2178,
- 226, 1181, 1874, 2179, 800, 2060, 609, 1371, 1749, 2121, 290, 2180, 354,
- 2181, 1308, 1812, 991, 1999, 673, 1118, 1937, 2122, 418, 2182, 864, 2061,
- 1561, 1624, 1245, 1875, 482, 1498, 1687, 2183, 737, 2123, 1435, 1750, 1055,
- 2000, 546, 928, 2062, 2184, 1182, 1938, 35, 1372, 1813, 2240, 99, 2241,
- 163, 2242, 801, 2124, 227, 2243, 610, 2185, 291, 1309, 1876, 2244, 992,
- 2063, 355, 1119, 1625, 2001, 2245, 1562, 1688, 674, 2186, 865, 1499, 1751,
- 2125, 419, 1246, 1939, 2246, 1436, 1814, 483, 2247, 738, 2187, 1056, 2064,
- 1373, 1877, 929, 1183, 2002, 2126, 547, 2248, 36, 2304, 100, 2305, 164,
- 802, 1310, 1940, 2188, 2306, 1626, 1689, 228, 1563, 1752, 2307, 611, 2249,
- 292, 2308, 1120, 1500, 1815, 2065, 993, 2127, 356, 2309, 1247, 2003, 675,
- 866, 1437, 1878, 2189, 2250, 420, 2310, 1374, 1941, 484, 1057, 2128, 2311,
- 739, 2251, 1184, 2066, 930, 1690, 2190, 1627, 1753, 548, 1564, 1816, 2312,
- 1311, 2004, 37, 803, 2252, 2368, 101, 1501, 1879, 2369, 165, 2370, 612,
- 2313, 229, 1121, 2129, 2371, 994, 2191, 1438, 1942, 293, 1248, 2067, 2372,
- 357, 867, 2253, 2373, 676, 2314, 1375, 2005, 421, 1691, 1754, 2374, 1628,
- 1817, 1058, 2192, 1185, 2130, 740, 1565, 1880, 2315, 485, 2375, 931, 2254,
- 1312, 2068, 1502, 1943, 549, 2376, 804, 2316, 38, 2432, 102, 1122, 1439,
- 2006, 2193, 2433, 166, 2434, 613, 995, 1249, 2131, 2255, 2377, 230, 2435,
- 1755, 294, 1692, 1818, 2436, 868, 1376, 2069, 2317, 1629, 1881, 358, 677,
- 2378, 2437, 1566, 1944, 422, 1186, 2194, 2438, 1059, 2256, 1313, 2132, 741,
- 1503, 2007, 2379, 932, 2318, 486, 2439, 550, 1440, 2070, 2440, 805, 1756,
- 1819, 2380, 1123, 2257, 1250, 1693, 1882, 2195, 39, 996, 2319, 2496, 103,
- 2497, 167, 614, 1630, 1945, 2441, 2498, 231, 1377, 2133, 2499, 295, 1567,
- 2008, 2500, 869, 2381, 678, 2442, 359, 2501, 1187, 2258, 1060, 2320, 1504,
- 2071, 1314, 2196, 423, 2502, 742, 933, 2382, 2443, 1820, 487, 1757, 1883,
- 2503, 1441, 2134, 1694, 1946, 551, 1124, 2321, 2504, 1251, 1631, 2009, 2259,
- 806, 2444, 997, 2383, 1378, 2197, 40, 1568, 2072, 2560, 104, 2561, 615,
- 2505, 168, 2562, 232, 2563, 870, 2445, 296, 2564, 1505, 2135, 1188, 2322,
- 679, 2506, 360, 1061, 1315, 1821, 1884, 2260, 2384, 2565, 1758, 1947, 424,
- 2566, 1695, 2010, 934, 1442, 2198, 2446, 743, 2507, 488, 1632, 2073, 2567,
- 1252, 2323, 1125, 2385, 552, 2568, 807, 1569, 2136, 2508, 1379, 2261, 998,
- 2447, 41, 616, 2569, 2624, 105, 1885, 2625, 1822, 1948, 169, 1506, 2199,
- 2626, 233, 871, 1759, 2011, 2509, 2627, 1189, 2386, 1316, 2324, 297, 2628,
- 680, 1062, 1696, 2074, 2448, 2570, 361, 2629, 1443, 2262, 1633, 2137, 425,
- 935, 2510, 2630, 744, 2571, 489, 1253, 2387, 2631, 1570, 2200, 1126, 2449,
- 1380, 2325, 1886, 1949, 808, 2572, 553, 1823, 2012, 2632, 999, 2511, 1760,
- 2075, 1507, 2263, 617, 2633, 42, 2688, 106, 1697, 2138, 2689, 170, 1190,
- 2450, 2690, 872, 1317, 2388, 2573, 234, 2691, 1063, 2512, 298, 1444, 2326,
- 2692, 681, 1634, 2201, 2634, 362, 2693, 936, 2574, 426, 1950, 2694, 1571,
- 2264, 745, 1887, 2013, 2635, 1254, 2451, 1824, 2076, 1127, 1381, 2389, 2513,
- 490, 2695, 1761, 2139, 809, 1000, 1508, 2327, 2575, 2636, 554, 2696, 1698,
- 2202, 1318, 2452, 618, 1191, 2514, 2697, 43, 2752, 107, 873, 1635, 2265,
- 2637, 2753, 171, 1445, 2390, 2754, 1064, 2576, 235, 2755, 1951, 2014, 682,
- 2698, 299, 1888, 2077, 2756, 1572, 2328, 1825, 2140, 363, 2757, 937, 2638,
- 1255, 2515, 427, 746, 1382, 1762, 2203, 2453, 2699, 2758, 1128, 2577, 491,
- 1509, 2391, 2759, 1699, 2266, 1001, 2639, 810, 2700, 555, 2760, 1319, 1636,
- 2329, 2516, 2015, 1192, 1952, 2078, 2578, 1446, 2454, 619, 1889, 2141, 2761,
- 874, 2701, 44, 2816, 108, 1065, 2640, 2817, 172, 1826, 2204, 2818, 236,
- 1573, 2392, 2819, 683, 2762, 300, 2820, 1763, 2267, 938, 2702, 364, 1256,
- 2579, 2821, 1383, 2517, 747, 1129, 2641, 2763, 428, 1700, 2330, 2822, 1510,
- 2455, 492, 2016, 2079, 2823, 1002, 1953, 2142, 2703, 811, 2764, 1637, 2393,
- 1890, 2205, 556, 1320, 2580, 2824, 1193, 1447, 2518, 2642, 1827, 2268, 620,
- 2825, 875, 2765, 1066, 1574, 2456, 2704, 45, 1764, 2331, 2880, 109, 2881,
- 173, 2882, 237, 2883, 684, 2826, 301, 1384, 2581, 2884, 1257, 2643, 939,
- 1701, 2394, 2766, 2080, 365, 1511, 2017, 2143, 2519, 2885, 1130, 2705, 1954,
- 2206, 748, 2827, 429, 2886, 1891, 2269, 1638, 2457, 493, 1003, 2767, 2887,
- 812, 1828, 2332, 2828, 1321, 2644, 1448, 2582, 1194, 2706, 557, 2888, 1575,
- 2520, 1765, 2395, 876, 1067, 2768, 2829, 621, 2889, 2081, 2144, 46, 2944,
- 110, 2018, 2207, 2945, 174, 1702, 2458, 2946, 1385, 2645, 238, 685, 1258,
- 1955, 2270, 2707, 2890, 2947, 1512, 2583, 302, 940, 2830, 2948, 1892, 2333,
- 1131, 2769, 366, 2949, 749, 1639, 2521, 2891, 430, 2950, 1829, 2396, 1004,
- 2831, 1322, 2708, 494, 1449, 2646, 2951, 813, 2892, 1195, 1766, 2459, 2770,
- 1576, 2584, 2145, 558, 2082, 2208, 2952, 2019, 2271, 1068, 2832, 877, 2893,
- 1956, 2334, 622, 1703, 2522, 2953, 1386, 2709, 47, 3008, 111, 1259, 1513,
- 1893, 2397, 2647, 2771, 3009, 175, 3010, 686, 2954, 239, 3011, 941, 2894,
- 303, 1132, 1640, 2585, 2833, 3012, 1830, 2460, 367, 3013, 750, 2955, 431,
- 2146, 2209, 3014, 1450, 2710, 1323, 2083, 2272, 2772, 1005, 1767, 2523, 2895,
- 1577, 2020, 2335, 2648, 495, 3015, 814, 1196, 2834, 2956, 1957, 2398, 559,
- 3016, 1704, 2586, 1069, 2896, 878, 1894, 2461, 2957, 623, 1387, 2773, 3017,
- 1514, 2711, 1260, 2835, 48, 3072, 112, 1831, 2524, 3073, 1641, 2649, 176,
- 3074, 687, 3018, 942, 2210, 2958, 240, 3075, 1133, 2147, 2273, 2897, 304,
- 2084, 2336, 3076, 368, 1768, 2587, 3077, 751, 2021, 2399, 3019, 1451, 2774,
- 1324, 2836, 432, 1578, 2712, 3078, 1006, 2959, 1958, 2462, 1197, 2898, 496,
- 815, 3020, 3079, 1705, 2650, 1895, 2525, 560, 3080, 1070, 2960, 1388, 2837,
- 879, 1515, 2775, 3021, 2211, 2274, 1832, 2588, 624, 2148, 2337, 3081, 1261,
- 2899, 1642, 2713, 2085, 2400, 49, 3136, 113, 3137, 688, 3082, 177, 943,
- 1134, 2022, 2463, 2961, 3022, 3138, 241, 1769, 2651, 3139, 305, 3140, 1452,
- 2838, 1959, 2526, 752, 1325, 1579, 2776, 2900, 3083, 369, 3141, 1007, 3023,
- 433, 3142, 1198, 1706, 2714, 2962, 1896, 2589, 816, 3084, 497, 2275, 3143,
- 2212, 2338, 2149, 2401, 561, 1071, 1516, 1833, 2652, 2839, 3024, 3144, 1389,
- 2901, 2086, 2464, 880, 3085, 1643, 2777, 1262, 2963, 625, 2023, 2527, 3145,
- 1770, 2715, 1135, 3025, 50, 944, 1960, 2590, 3086, 3200, 114, 689, 3146,
- 3201, 178, 3202, 242, 1453, 2902, 3203, 1580, 2840, 306, 1326, 2964, 3204,
- 2276, 2339, 753, 1897, 2653, 3147, 370, 1707, 2213, 2402, 2778, 3205, 1008,
- 3087, 1199, 2150, 2465, 3026, 434, 3206, 817, 2087, 2528, 3148, 1834, 2716,
- 498, 3207, 1517, 2903, 1390, 2965, 1072, 3088, 1644, 2024, 2591, 2841, 562,
- 3208, 881, 1263, 3027, 3149, 1771, 2779, 626, 1961, 2654, 3209, 2340, 1136,
- 3089, 2277, 2403, 945, 3150, 690, 1454, 2214, 2466, 2966, 3210, 51, 1581,
- 2904, 3264, 115, 3265, 179, 1898, 2717, 3266, 1327, 3028, 243, 2151, 2529,
- 3267, 1708, 2842, 307, 3268, 754, 3211, 2088, 2592, 371, 1009, 3151, 3269,
- 1200, 3090, 1835, 2780, 435, 3270, 2025, 2655, 818, 3212, 1518, 2967, 499,
- 1391, 1645, 2905, 3029, 3271, 1073, 3152, 1962, 2718, 563, 1264, 1772, 2341,
- 2404, 2843, 3091, 3272, 882, 2278, 2467, 3213, 2215, 2530, 627, 3273, 2152,
- 2593, 1137, 1899, 2781, 3153, 1582, 2968, 1455, 3030, 946, 3214, 691, 1709,
- 2906, 3274, 52, 1328, 3092, 3328, 116, 2089, 2656, 3329, 180, 3330, 244,
- 3331, 308, 1836, 2844, 3332, 755, 3275, 1010, 1201, 2026, 2719, 3154, 3215,
- 372, 3333, 1519, 2405, 3031, 436, 2342, 2468, 3334, 1646, 2969, 819, 1392,
- 3093, 3276, 2279, 2531, 1963, 2782, 500, 3335, 1773, 2907, 1074, 2216, 2594,
- 3216, 1265, 3155, 564, 3336, 883, 2153, 2657, 3277, 1900, 2845, 628, 1583,
- 3032, 3337, 1456, 2090, 2720, 3094, 1138, 3217, 1710, 2970, 947, 3278, 1329,
- 3156, 692, 3338, 53, 1837, 2908, 3392, 117, 2027, 2783, 3393, 181, 2406,
- 2469, 3394, 2343, 2532, 245, 3395, 1202, 3218, 309, 756, 2280, 2595, 3339,
- 3396, 1011, 3279, 1520, 3095, 373, 1647, 3033, 3397, 1964, 2846, 2217, 2658,
- 1393, 3157, 437, 1774, 2971, 3398, 820, 3340, 2154, 2721, 1075, 3280, 501,
- 3399, 1266, 3219, 1901, 2909, 565, 884, 2091, 2784, 3341, 3400, 1584, 3096,
- 1457, 1711, 3034, 3158, 2470, 629, 1139, 2407, 2533, 3281, 3401, 2344, 2596,
- 2028, 2847, 948, 1330, 1838, 2972, 3220, 3342, 2281, 2659, 693, 3402, 54,
- 3456, 118, 3457, 182, 2218, 2722, 3458, 246, 1203, 1965, 2910, 3282, 3459,
- 1012, 1648, 3097, 3343, 757, 1521, 3159, 3403, 310, 3460, 1775, 2155, 2785,
- 3035, 374, 1394, 3221, 3461, 438, 3462, 821, 3404, 1902, 2973, 1076, 2092,
- 2848, 3344, 1267, 3283, 502, 2471, 2534, 3463, 2408, 2597, 1585, 2345, 2660,
- 3160, 885, 3405, 566, 1712, 3098, 3464, 1458, 3222, 2029, 2911, 2282, 2723,
- 1140, 1839, 3036, 3345, 630, 3465, 1331, 3284, 949, 2219, 2786, 3406, 694,
- 1966, 2974, 3466, 55, 2156, 2849, 3520, 119, 1649, 3161, 3521, 1204, 3346,
- 183, 1522, 3223, 3522, 1776, 3099, 247, 1013, 3407, 3523, 758, 3467, 311,
- 3524, 1395, 2535, 3285, 2472, 2598, 2093, 2912, 375, 1903, 2409, 2661, 3037,
- 3525, 822, 2346, 2724, 3468, 439, 3526, 1077, 1268, 3347, 3408, 503, 2283,
- 2787, 3527, 1586, 3224, 1713, 2030, 2975, 3162, 886, 1459, 3286, 3469, 1840,
- 3100, 567, 3528, 2220, 2850, 1141, 3409, 1332, 3348, 631, 3529, 1967, 3038,
- 950, 3470, 2157, 2913, 2536, 2599, 695, 1650, 2473, 2662, 3225, 3530, 1523,
- 1777, 3163, 3287, 1205, 2410, 2725, 3410, 56, 3584, 120, 3585, 184, 2094,
- 2976, 3586, 1014, 3471, 248, 1396, 1904, 2347, 2788, 3101, 3349, 3587, 759,
- 3531, 312, 3588, 376, 2284, 2851, 3589, 823, 3532, 1269, 2031, 3039, 3411,
- 440, 1078, 3472, 3590, 1714, 3226, 1587, 3288, 2221, 2914, 504, 1841, 3164,
- 3591, 1460, 3350, 887, 3533, 568, 2600, 3592, 2537, 2663, 1968, 3102, 1142,
- 2158, 2977, 3473, 2474, 2726, 1333, 3412, 632, 3593, 2411, 2789, 951, 3534,
- 1651, 3289, 1778, 3227, 2348, 2852, 1524, 2095, 3040, 3351, 696, 3594, 1206,
- 3474, 1905, 3165, 57, 3648, 121, 1015, 1397, 2285, 2915, 3413, 3535, 3649,
- 185, 3650, 760, 3595, 249, 3651, 313, 2032, 3103, 3652, 2222, 2978, 377,
- 3653, 1270, 1715, 3290, 3475, 824, 1588, 3352, 3596, 1079, 2601, 2664, 3536,
- 1842, 3228, 441, 2538, 2727, 3654, 1461, 2475, 2790, 3414, 505, 2159, 3041,
- 3655, 1969, 3166, 888, 2412, 2853, 3597, 569, 3656, 1143, 3537, 1334, 3476,
- 2349, 2916, 2096, 3104, 1652, 3353, 633, 1779, 3291, 3657, 952, 3598, 1525,
- 3415, 1906, 2286, 2979, 3229, 697, 1207, 3538, 3658, 1398, 3477, 1016, 3599,
- 2033, 2665, 3167, 58, 2602, 2728, 3712, 122, 2223, 3042, 3713, 186, 3714,
- 761, 2539, 2791, 3659, 250, 3715, 314, 1716, 2476, 2854, 3354, 3716, 1589,
- 1843, 3292, 3416, 1271, 3539, 378, 3717, 1080, 3600, 825, 2160, 3105, 3660,
- 2413, 2917, 442, 1462, 1970, 3230, 3478, 3718, 2350, 2980, 506, 3719, 889,
- 3661, 1144, 1335, 2097, 3168, 3540, 3601, 570, 3720, 1780, 3355, 1653, 2287,
- 3043, 3417, 1907, 3293, 634, 953, 1526, 2666, 2729, 3479, 3662, 3721, 2603,
- 2792, 2540, 2855, 1208, 2224, 3106, 3602, 2034, 3231, 698, 3722, 1399, 3541,
- 2477, 2918, 1017, 3663, 59, 3776, 123, 3777, 187, 762, 1717, 2414, 2981,
- 3418, 3723, 3778, 1844, 3356, 251, 2161, 3169, 3779, 1590, 3480, 315, 1272,
- 3603, 3780, 1971, 3294, 1081, 2351, 3044, 3664, 379, 3781, 826, 3724, 1463,
- 3542, 443, 3782, 2098, 3232, 2730, 2288, 3107, 507, 2667, 2793, 3783, 890,
- 3725, 1336, 2604, 2856, 3604, 1145, 1781, 3419, 3665, 1654, 3481, 571, 1908,
- 3357, 3784, 2541, 2919, 1527, 3543, 2225, 3170, 954, 2478, 2982, 3726, 635,
- 2035, 3295, 3785, 1209, 3666, 1400, 3605, 2415, 3045, 699, 3786, 1018, 2162,
- 3233, 3727, 1718, 3482, 1845, 3420, 60, 2352, 3108, 3840, 124, 1591, 3544,
- 3841, 763, 3787, 188, 1972, 3358, 3842, 252, 3843, 1273, 3667, 2731, 2794,
- 316, 3844, 2668, 2857, 1082, 1464, 3606, 3728, 380, 827, 2099, 2605, 2920,
- 3296, 3788, 3845, 2289, 3171, 444, 3846, 2542, 2983, 1782, 3483, 508, 1337,
- 3668, 3847, 891, 1655, 1909, 3421, 3545, 3789, 1146, 2479, 3046, 3729, 2226,
- 3234, 572, 3848, 1528, 2036, 3359, 3607, 2416, 3109, 955, 3790, 636, 3849,
- 1210, 3730, 1401, 2163, 3297, 3669, 2353, 3172, 2795, 700, 1846, 2732, 2858,
- 3484, 3850, 1719, 3546, 1019, 2669, 2921, 3791, 1973, 3422, 1592, 3608, 2606,
- 2984, 61, 764, 3851, 3904, 125, 3905, 189, 1274, 2290, 3235, 3731, 3906,
- 2100, 3360, 253, 2543, 3047, 3907, 1465, 3670, 317, 1083, 3792, 3908, 828,
- 3852, 381, 3909, 2480, 3110, 1783, 3547, 445, 1910, 2227, 3298, 3485, 3910,
- 1656, 3609, 1338, 3732, 892, 3853, 509, 1147, 2037, 2417, 3173, 3423, 3793,
- 3911, 1529, 3671, 573, 2796, 2859, 3912, 2733, 2922, 2164, 3361, 956, 2354,
- 3236, 3854, 2670, 2985, 637, 3913, 1211, 1402, 3733, 3794, 1847, 2607, 3048,
- 3548, 1720, 3610, 1974, 3486, 701, 3914, 1020, 1593, 2544, 3111, 3672, 3855,
- 2291, 3299, 2101, 3424, 765, 1275, 3795, 3915, 62, 3968, 126, 2481, 3174,
- 3969, 190, 1466, 3734, 3970, 254, 3971, 1084, 3856, 318, 2228, 3362, 3972,
- 829, 1784, 3611, 3916, 1911, 3549, 382, 2418, 3237, 3973, 2860, 1657, 2797,
- 2923, 3673, 2038, 3487, 446, 2734, 2986, 3974, 1339, 3796, 1148, 3857, 893,
- 2671, 3049, 3917, 510, 1530, 3735, 3975, 2355, 3300, 2165, 3425, 2608, 3112,
- 574, 3976, 957, 3918, 1848, 3612, 1403, 2545, 3175, 3797, 1212, 3858, 638,
- 1721, 1975, 3550, 3674, 3977, 2292, 3363, 1594, 2102, 3488, 3736, 702, 2482,
- 3238, 3978, 1021, 3919, 1276, 2861, 2924, 3859, 766, 1467, 2229, 2798, 2987,
- 3426, 3798, 3979, 63, 4032, 127, 2419, 3301, 4033, 191, 2735, 3050, 4034,
- 1085, 1912, 3613, 3920, 255, 1785, 3675, 4035, 319, 2672, 3113, 4036, 2039,
- 3551, 830, 3980, 1658, 3737, 383, 4037, 1340, 2356, 3364, 3860, 2609, 3176,
- 447, 2166, 3489, 4038, 1149, 1531, 3799, 3921, 894, 3981, 511, 4039, 2546,
- 3239, 575, 1849, 3676, 4040, 2293, 3427, 1976, 3614, 958, 1722, 3738, 3982,
- 1404, 3861, 1213, 2483, 3302, 3922, 2103, 3552, 639, 2925, 4041, 2862, 2988,
- 1595, 3800, 2799, 3051, 2736, 3114, 703, 1022, 3983, 4042, 2230, 3490, 2420,
- 3365, 1277, 2673, 3177, 3923, 1468, 3862, 767, 1913, 3677, 4043, 1786, 3739,
- 2040, 3615, 1086, 2610, 3240, 3984, 2357, 3428, 1659, 3801, 831, 4044, 2167,
- 3553, 1341, 3924, 2547, 3303, 1532, 3863, 1150, 3985, 895, 4045, 2294, 2926,
- 2989, 3491, 2863, 3052, 1850, 2484, 3366, 3740, 1977, 3678, 2800, 3115, 1723,
- 3802, 2104, 3616, 1405, 3925, 959, 2737, 3178, 4046, 1214, 3986, 1596, 3864,
- 2421, 3429, 2231, 2674, 3241, 3554, 1023, 4047, 2611, 3304, 1278, 1469, 1914,
- 3741, 3926, 3987, 1787, 2041, 3679, 3803, 2358, 3492, 1087, 1660, 2168, 2548,
- 3367, 3617, 3865, 4048, 2990, 2927, 3053, 2864, 3116, 1342, 3988, 1533, 2295,
- 2801, 3179, 3555, 3927, 2485, 3430, 1151, 4049, 1978, 2738, 3242, 3742, 1851,
- 3804, 2105, 3680, 1724, 3866, 2675, 3305, 1406, 2422, 3493, 3989, 2232, 3618,
- 1215, 4050, 1597, 3928, 2612, 3368, 2359, 3556, 1915, 3805, 2042, 2991, 3054,
- 3743, 1470, 3990, 1788, 2928, 3117, 3867, 1279, 2549, 3431, 4051, 2865, 3180,
- 2169, 3681, 1661, 3929, 2802, 3243, 2486, 3494, 2296, 3619, 2739, 3306, 1343,
- 4052, 1534, 3991, 1979, 3806, 1852, 3868, 2676, 3369, 2106, 3744, 2423, 3557,
- 1725, 3930, 2233, 3682, 2613, 3432, 1407, 4053, 3055, 1598, 2992, 3118, 3992,
- 2929, 3181, 2360, 3620, 2866, 3244, 2550, 3495, 1916, 3869, 2043, 3807, 1789,
- 2803, 3307, 3931, 1471, 2170, 3745, 4054, 2740, 3370, 1662, 2487, 3558, 3993,
- 2297, 3683, 2677, 3433, 1535, 4055, 1980, 3870, 1853, 2107, 2424, 3621, 3808,
- 3932, 3056, 3119, 2614, 3496, 2993, 3182, 1726, 2234, 3746, 3994, 2930, 3245,
- 2867, 3308, 1599, 2361, 3684, 4056, 2551, 3559, 2804, 3371, 2044, 3871, 1917,
- 3933, 2171, 3809, 1790, 2741, 3434, 3995, 2488, 3622, 2298, 3747, 1663, 4057,
- 2678, 3497, 3120, 3057, 3183, 2994, 3246, 2425, 3685, 1981, 3934, 2108, 3872,
- 2615, 3560, 2931, 3309, 1854, 3996, 2235, 3810, 2868, 3372, 1727, 4058, 2552,
- 3623, 2805, 3435, 2362, 3748, 2742, 3498, 2045, 3935, 1918, 3997, 2172, 3873,
- 2489, 3686, 1791, 4059, 3121, 3184, 2299, 2679, 3561, 3811, 3058, 3247, 2995,
- 3310, 2932, 3373, 2426, 3749, 2616, 3624, 1982, 3998, 2109, 2869, 3436, 3936,
- 1855, 4060, 2236, 3874, 2806, 3499, 2553, 3687, 2363, 3812, 2743, 3562, 3185,
- 3122, 3248, 2046, 3999, 2490, 3750, 1919, 2173, 3059, 3311, 3937, 4061, 2680,
- 3625, 2996, 3374, 2300, 3875, 2933, 3437, 2617, 3688, 2427, 3813, 2870, 3500,
- 2110, 4000, 1983, 4062, 2807, 3563, 2237, 3938, 2554, 3751, 2364, 3876, 2744,
- 3626, 3186, 3249, 3123, 3312, 3060, 3375, 2491, 2997, 3438, 3814, 2047, 2681,
- 3689, 4063, 2174, 4001, 2934, 3501, 2301, 3939, 2871, 3564, 2618, 3752, 2428,
- 3877, 2808, 3627, 2111, 4064, 2238, 3250, 4002, 2555, 3187, 3313, 3815, 3124,
- 3376, 2745, 3690, 2365, 3940, 3061, 3439, 2998, 3502, 2492, 3878, 2682, 3753,
- 2935, 3565, 2175, 4065, 2302, 4003, 2872, 3628, 2619, 3816, 2429, 3941, 2809,
- 3691, 3251, 3314, 3188, 3377, 3125, 3440, 2556, 3879, 2239, 3062, 3503, 4066,
- 2746, 3754, 2366, 4004, 2999, 3566, 2936, 3629, 2683, 3817, 2493, 3942, 2873,
- 3692, 2303, 4067, 2620, 3880, 3315, 3252, 3378, 3189, 3441, 2430, 2810, 3755,
- 4005, 3126, 3504, 3063, 3567, 2557, 3943, 2747, 3818, 3000, 3630, 2367, 4068,
- 2937, 3693, 2684, 3881, 2494, 4006, 2874, 3756, 3316, 3379, 3253, 3442, 3190,
- 3505, 2621, 3944, 3127, 3568, 2811, 3819, 2431, 4069, 3064, 3631, 2748, 3882,
- 2558, 3001, 3694, 4007, 2938, 3757, 2685, 3945, 3380, 3317, 3443, 2495, 4070,
- 3254, 3506, 2875, 3820, 3191, 3569, 3128, 3632, 2622, 4008, 2812, 3883, 3065,
- 3695, 3002, 3758, 2749, 3946, 2559, 4071, 2939, 3821, 3381, 3444, 3318, 3507,
- 2686, 3255, 3570, 4009, 2876, 3884, 3192, 3633, 3129, 3696, 2623, 4072, 2813,
- 3947, 3066, 3759, 3003, 3822, 2750, 4010, 3445, 3382, 3508, 2940, 3885, 3319,
- 3571, 3256, 3634, 2687, 3193, 3697, 4073, 2877, 3948, 3130, 3760, 3067, 3823,
- 2814, 4011, 3004, 3886, 3446, 3509, 3383, 3572, 2751, 4074, 3320, 3635, 2941,
- 3949, 3257, 3698, 3194, 3761, 2878, 4012, 3131, 3824, 3068, 3887, 2815, 4075,
- 3510, 3447, 3573, 3005, 3950, 3384, 3636, 3321, 3699, 3258, 3762, 2942, 4013,
- 3195, 3825, 3132, 3888, 2879, 4076, 3069, 3951, 3511, 3574, 3448, 3637, 3385,
- 3700, 3006, 4014, 3322, 3763, 3259, 3826, 2943, 4077, 3196, 3889, 3133, 3952,
- 3575, 3512, 3638, 3070, 4015, 3449, 3701, 3386, 3764, 3323, 3827, 3007, 4078,
- 3260, 3890, 3197, 3953, 3134, 4016, 3576, 3639, 3513, 3702, 3450, 3765, 3071,
- 4079, 3387, 3828, 3324, 3891, 3261, 3954, 3198, 4017, 3640, 3135, 4080, 3577,
- 3703, 3514, 3766, 3451, 3829, 3388, 3892, 3325, 3955, 3262, 4018, 3199, 4081,
- 3641, 3704, 3578, 3767, 3515, 3830, 3452, 3893, 3389, 3956, 3326, 4019, 3263,
- 4082, 3705, 3642, 3768, 3579, 3831, 3516, 3894, 3453, 3957, 3390, 4020, 3327,
- 4083, 3706, 3769, 3643, 3832, 3580, 3895, 3517, 3958, 3454, 4021, 3391, 4084,
- 3770, 3707, 3833, 3644, 3896, 3581, 3959, 3518, 4022, 3455, 4085, 3771, 3834,
- 3708, 3897, 3645, 3960, 3582, 4023, 3519, 4086, 3835, 3772, 3898, 3709, 3961,
- 3646, 4024, 3583, 4087, 3836, 3899, 3773, 3962, 3710, 4025, 3647, 4088, 3900,
- 3837, 3963, 3774, 4026, 3711, 4089, 3901, 3964, 3838, 4027, 3775, 4090, 3965,
- 3902, 4028, 3839, 4091, 3966, 4029, 3903, 4092, 4030, 3967, 4093, 4031, 4094,
- 4095,
-};
-#endif // CONFIG_TX64X64
-
-#if CONFIG_CHROMA_2X2
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_2x2_neighbors[5 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 1, 1, 2, 0, 0,
+ 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66,
+ 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68,
+ 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194,
+ 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195,
+ 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, 41,
+ 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352,
+ 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12,
+ 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385,
+ 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107,
+ 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232, 263, 294,
+ 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326,
+ 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79,
+ 110, 141, 172, 203, 234, 265, 296, 327, 358, 389, 420, 451, 482,
+ 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266,
+ 235, 204, 173, 142, 111, 80, 49, 18, 19, 50, 81, 112, 143,
+ 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546,
+ 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330,
+ 299, 268, 237, 206, 175, 144, 113, 82, 51, 20, 21, 52, 83,
+ 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486,
+ 517, 548, 579, 610, 641, 672, 704, 673, 642, 611, 580, 549, 518,
+ 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115,
+ 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302,
+ 333, 364, 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705,
+ 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427,
+ 396, 365, 334, 303, 272, 241, 210, 179, 148, 117, 86, 55, 24,
+ 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397,
+ 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800,
+ 832, 801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460,
+ 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57,
+ 26, 27, 58, 89, 120, 151, 182, 213, 244, 275, 306, 337, 368,
+ 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771,
+ 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617,
+ 586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214,
+ 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215,
+ 246, 277, 308, 339, 370, 401, 432, 463, 494, 525, 556, 587, 618,
+ 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898,
+ 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526, 495,
+ 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92,
+ 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341,
+ 372, 403, 434, 465, 496, 527, 558, 589, 620, 651, 682, 713, 744,
+ 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869,
+ 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528, 497, 466,
+ 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63,
+ 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467,
+ 498, 529, 560, 591, 622, 653, 684, 715, 746, 777, 808, 839, 870,
+ 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747,
+ 716, 685, 654, 623, 592, 561, 530, 499, 468, 437, 406, 375, 344,
+ 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314,
+ 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717,
+ 748, 779, 810, 841, 872, 903, 934, 965, 996, 997, 966, 935, 904,
+ 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501,
+ 470, 439, 408, 377, 346, 315, 284, 253, 222, 191, 223, 254, 285,
+ 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688,
+ 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937,
+ 906, 875, 844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534,
+ 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380,
+ 411, 442, 473, 504, 535, 566, 597, 628, 659, 690, 721, 752, 783,
+ 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846,
+ 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443,
+ 412, 381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599,
+ 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002,
+ 1003, 972, 941, 910, 879, 848, 817, 786, 755, 724, 693, 662, 631,
+ 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539,
+ 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942,
+ 973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695,
+ 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603,
+ 634, 665, 696, 727, 758, 789, 820, 851, 882, 913, 944, 975, 1006,
+ 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635,
+ 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760, 791,
+ 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854,
+ 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700,
+ 731, 762, 793, 824, 855, 886, 917, 948, 979, 1010, 1011, 980, 949,
+ 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733,
+ 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982, 951, 920,
+ 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890,
+ 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767,
+ 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893,
+ 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895,
+ 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023
};
-#endif
// Neighborhood 2-tuples for various scans and blocksizes,
// in {top, left} order for each position in corresponding scan order.
DECLARE_ALIGNED(16, static const int16_t,
default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 4, 0, 1, 4, 4, 5, 5, 1, 8, 8, 5, 8, 2,
- 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+ 0, 0, 0, 0, 0, 0, 4, 4, 1, 4, 1, 1, 2, 2, 2, 5, 5,
+ 8, 8, 8, 9, 12, 6, 9, 3, 6, 7, 10, 10, 13, 11, 14, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1,
@@ -1840,19 +858,6 @@ DECLARE_ALIGNED(16, static const int16_t,
0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4,
4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t,
- col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 4, 4, 4, 0, 8, 8, 1, 4, 5, 8, 5, 1, 9,
- 12, 2, 5, 6, 9, 6, 2, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 1, 1, 1, 1, 4, 2, 2, 2, 5, 4, 5, 5,
- 8, 3, 6, 8, 9, 6, 9, 9, 12, 7, 10, 10, 13, 11, 14, 0, 0,
-};
DECLARE_ALIGNED(16, static const int16_t,
default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
@@ -1924,7 +929,6 @@ DECLARE_ALIGNED(16, static const int16_t,
14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mrow_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, 4, 5,
@@ -1968,7 +972,6 @@ DECLARE_ALIGNED(16, static const int16_t,
58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13,
14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
default_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = {
@@ -2048,7 +1051,6 @@ DECLARE_ALIGNED(16, static const int16_t,
223, 254, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mrow_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
@@ -2202,20 +1204,7 @@ DECLARE_ALIGNED(16, static const int16_t,
30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254,
0, 0
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t,
- col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8, 24, 24, 9, 16, 9, 1, 32,
- 32, 17, 24, 2, 9, 25, 32, 10, 17, 40, 40, 10, 2, 18, 25, 33, 40, 3, 10,
- 48, 48, 11, 18, 26, 33, 11, 3, 41, 48, 19, 26, 34, 41, 4, 11, 27, 34, 12,
- 19, 49, 56, 42, 49, 20, 27, 12, 4, 35, 42, 5, 12, 28, 35, 50, 57, 43, 50,
- 13, 20, 36, 43, 13, 5, 21, 28, 51, 58, 29, 36, 6, 13, 44, 51, 14, 21, 14,
- 6, 37, 44, 52, 59, 22, 29, 7, 14, 30, 37, 45, 52, 15, 22, 38, 45, 23, 30,
- 53, 60, 31, 38, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
-};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1,
@@ -2237,28 +1226,16 @@ DECLARE_ALIGNED(16, static const int16_t,
46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48,
49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t,
- row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 0, 1, 1, 8, 2, 2, 8, 9, 2, 9, 3, 3, 9,
- 16, 3, 10, 16, 17, 4, 4, 10, 17, 17, 24, 4, 11, 11, 18, 18, 25, 24, 25,
- 5, 5, 5, 12, 12, 19, 25, 32, 19, 26, 6, 6, 26, 33, 32, 33, 13, 20, 20,
- 27, 33, 40, 6, 13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, 41, 48, 14, 21,
- 35, 42, 7, 14, 48, 49, 29, 36, 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43,
- 50, 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, 45, 52, 38, 45, 52, 59,
- 31, 38, 53, 60, 39, 46, 46, 53, 47, 54, 54, 61, 55, 62, 0, 0,
-};
DECLARE_ALIGNED(16, static const int16_t,
default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 0, 8, 8, 1, 8, 9, 1, 9, 16, 16, 17, 2, 9, 10,
- 2, 10, 17, 17, 24, 24, 25, 3, 10, 11, 3, 18, 25, 25, 32, 11, 18, 32, 33,
- 4, 11, 26, 33, 19, 26, 12, 4, 33, 40, 12, 19, 40, 41, 5, 12, 27, 34, 34,
- 41, 20, 27, 13, 20, 13, 5, 41, 48, 48, 49, 28, 35, 35, 42, 21, 28, 6, 6,
- 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22,
- 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45,
- 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
+ 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 2, 2, 2, 9, 9, 16, 16,
+ 16, 24, 24, 17, 24, 10, 17, 3, 10, 3, 3, 4, 4, 4, 11, 11, 18, 18, 25,
+ 25, 32, 32, 32, 40, 40, 33, 40, 26, 33, 19, 26, 12, 19, 5, 12, 5, 5, 6,
+ 6, 6, 13, 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 49, 56, 42, 49,
+ 35, 42, 28, 35, 21, 28, 14, 21, 7, 14, 15, 22, 22, 29, 29, 36, 36, 43, 43,
+ 50, 50, 57, 51, 58, 44, 51, 37, 44, 30, 37, 23, 30, 31, 38, 38, 45, 45, 52,
+ 52, 59, 53, 60, 46, 53, 39, 46, 47, 54, 54, 61, 55, 62, 0, 0
};
DECLARE_ALIGNED(16, static const int16_t,
@@ -2829,7 +1806,6 @@ DECLARE_ALIGNED(16, static const int16_t,
478, 509, 479, 510, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
@@ -2907,126 +1883,46 @@ DECLARE_ALIGNED(16, static const int16_t,
246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
239, 254, 0, 0,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t,
- col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 16, 32, 32, 16, 0, 48, 48, 1, 16, 64,
- 64, 17, 32, 80, 80, 33, 48, 17, 1, 49, 64, 96, 96, 2, 17,
- 65, 80, 18, 33, 112, 112, 34, 49, 81, 96, 18, 2, 50, 65, 128,
- 128, 3, 18, 97, 112, 19, 34, 66, 81, 144, 144, 82, 97, 35, 50,
- 113, 128, 19, 3, 51, 66, 160, 160, 4, 19, 98, 113, 129, 144, 67,
- 82, 20, 35, 83, 98, 114, 129, 36, 51, 176, 176, 20, 4, 145, 160,
- 52, 67, 99, 114, 5, 20, 130, 145, 68, 83, 192, 192, 161, 176, 21,
- 36, 115, 130, 84, 99, 37, 52, 146, 161, 208, 208, 53, 68, 21, 5,
- 100, 115, 177, 192, 131, 146, 69, 84, 6, 21, 224, 224, 116, 131, 22,
- 37, 162, 177, 85, 100, 147, 162, 38, 53, 193, 208, 101, 116, 54, 69,
- 22, 6, 132, 147, 178, 193, 70, 85, 163, 178, 209, 224, 7, 22, 117,
- 132, 23, 38, 148, 163, 23, 7, 86, 101, 194, 209, 225, 240, 39, 54,
- 179, 194, 102, 117, 133, 148, 55, 70, 164, 179, 8, 23, 71, 86, 210,
- 225, 118, 133, 149, 164, 195, 210, 24, 39, 87, 102, 40, 55, 56, 71,
- 134, 149, 180, 195, 226, 241, 103, 118, 24, 8, 165, 180, 211, 226, 72,
- 87, 150, 165, 9, 24, 119, 134, 25, 40, 88, 103, 196, 211, 41, 56,
- 135, 150, 181, 196, 104, 119, 57, 72, 227, 242, 166, 181, 120, 135, 151,
- 166, 197, 212, 73, 88, 25, 9, 212, 227, 89, 104, 136, 151, 182, 197,
- 10, 25, 26, 41, 105, 120, 167, 182, 228, 243, 152, 167, 42, 57, 121,
- 136, 213, 228, 58, 73, 198, 213, 74, 89, 137, 152, 183, 198, 168, 183,
- 26, 10, 90, 105, 229, 244, 11, 26, 106, 121, 214, 229, 153, 168, 27,
- 42, 199, 214, 43, 58, 184, 199, 122, 137, 169, 184, 230, 245, 59, 74,
- 27, 11, 75, 90, 138, 153, 200, 215, 215, 230, 91, 106, 12, 27, 28,
- 43, 185, 200, 107, 122, 154, 169, 44, 59, 231, 246, 216, 231, 60, 75,
- 123, 138, 28, 12, 76, 91, 201, 216, 170, 185, 232, 247, 139, 154, 92,
- 107, 13, 28, 108, 123, 29, 44, 186, 201, 217, 232, 155, 170, 45, 60,
- 29, 13, 61, 76, 124, 139, 14, 14, 233, 248, 77, 92, 14, 29, 171,
- 186, 140, 155, 202, 217, 30, 45, 93, 108, 109, 124, 46, 61, 156, 171,
- 62, 77, 187, 202, 15, 30, 125, 140, 218, 233, 78, 93, 31, 46, 172,
- 187, 47, 62, 141, 156, 94, 109, 234, 249, 203, 218, 63, 78, 110, 125,
- 188, 203, 157, 172, 126, 141, 79, 94, 173, 188, 95, 110, 219, 234, 142,
- 157, 204, 219, 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235,
- 143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, 221, 236, 175,
- 190, 237, 252, 206, 221, 222, 237, 191, 206, 238, 253, 207, 222, 223, 238,
- 239, 254, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 16, 3, 3, 2,
- 17, 16, 17, 4, 4, 17, 32, 3, 18, 5, 5, 18, 33, 32, 33,
- 4, 19, 33, 48, 6, 6, 19, 34, 5, 20, 34, 49, 48, 49, 7,
- 7, 20, 35, 49, 64, 6, 21, 35, 50, 21, 36, 64, 65, 8, 8,
- 50, 65, 36, 51, 7, 22, 22, 37, 65, 80, 51, 66, 9, 9, 37,
- 52, 8, 23, 66, 81, 52, 67, 80, 81, 23, 38, 10, 10, 38, 53,
- 67, 82, 81, 96, 53, 68, 9, 24, 82, 97, 68, 83, 24, 39, 96,
- 97, 39, 54, 11, 11, 54, 69, 83, 98, 97, 112, 69, 84, 10, 25,
- 25, 40, 40, 55, 98, 113, 84, 99, 12, 12, 55, 70, 112, 113, 70,
- 85, 11, 26, 99, 114, 85, 100, 113, 128, 26, 41, 41, 56, 56, 71,
- 100, 115, 13, 13, 71, 86, 114, 129, 86, 101, 128, 129, 57, 72, 115,
- 130, 101, 116, 12, 27, 42, 57, 14, 14, 72, 87, 27, 42, 129, 144,
- 87, 102, 116, 131, 130, 145, 102, 117, 58, 73, 144, 145, 73, 88, 117,
- 132, 88, 103, 13, 28, 43, 58, 131, 146, 103, 118, 28, 43, 145, 160,
- 132, 147, 74, 89, 89, 104, 118, 133, 146, 161, 104, 119, 160, 161, 59,
- 74, 119, 134, 133, 148, 14, 29, 44, 59, 147, 162, 161, 176, 29, 44,
- 105, 120, 75, 90, 90, 105, 148, 163, 162, 177, 134, 149, 176, 177, 120,
- 135, 149, 164, 163, 178, 15, 30, 135, 150, 177, 192, 60, 75, 106, 121,
- 45, 60, 121, 136, 178, 193, 91, 106, 136, 151, 164, 179, 192, 193, 30,
- 45, 150, 165, 151, 166, 179, 194, 76, 91, 165, 180, 122, 137, 193, 208,
- 107, 122, 137, 152, 208, 209, 180, 195, 61, 76, 152, 167, 194, 209, 166,
- 181, 224, 224, 92, 107, 181, 196, 46, 61, 138, 153, 209, 224, 167, 182,
- 153, 168, 195, 210, 31, 46, 123, 138, 77, 92, 168, 183, 210, 225, 196,
- 211, 225, 240, 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62, 77,
- 197, 212, 169, 184, 93, 108, 211, 226, 184, 199, 47, 62, 212, 227, 226,
- 241, 124, 139, 198, 213, 155, 170, 170, 185, 140, 155, 213, 228, 227, 242,
- 109, 124, 78, 93, 185, 200, 228, 243, 199, 214, 200, 215, 214, 229, 125,
- 140, 171, 186, 186, 201, 63, 78, 156, 171, 94, 109, 141, 156, 229, 244,
- 201, 216, 215, 230, 79, 94, 230, 245, 216, 231, 110, 125, 187, 202, 231,
- 246, 217, 232, 157, 172, 202, 217, 126, 141, 95, 110, 142, 157, 172, 187,
- 232, 247, 111, 126, 218, 233, 203, 218, 233, 248, 173, 188, 188, 203, 127,
- 142, 158, 173, 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174,
- 174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, 191, 206, 221,
- 236, 236, 251, 206, 221, 237, 252, 207, 222, 222, 237, 223, 238, 238, 253,
- 239, 254, 0, 0,
-};
DECLARE_ALIGNED(16, static const int16_t,
default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 0, 16, 16, 1, 16, 17, 1, 32, 32, 17,
- 32, 2, 17, 18, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64,
- 64, 65, 34, 49, 19, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80,
- 81, 35, 50, 20, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 97,
- 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 21, 5, 52,
- 67, 112, 113, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 22, 6,
- 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 129, 114, 129, 69,
- 84, 38, 53, 7, 22, 23, 7, 129, 144, 23, 38, 54, 69, 100, 115,
- 85, 100, 115, 130, 144, 145, 130, 145, 39, 54, 70, 85, 8, 23, 55,
- 70, 116, 131, 101, 116, 145, 160, 24, 39, 24, 8, 86, 101, 131, 146,
- 160, 161, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161,
- 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 25, 9, 176, 177,
- 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148,
- 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 193, 26, 10,
- 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164,
- 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42,
- 74, 89, 208, 209, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43,
- 58, 27, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136,
- 209, 224, 195, 210, 224, 225, 166, 181, 106, 121, 75, 90, 12, 27, 181,
- 196, 28, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211,
- 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168,
- 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44,
- 76, 91, 29, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154,
- 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 30, 14,
- 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170,
- 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215,
- 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171,
- 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231,
- 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110,
- 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218,
- 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234,
- 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250,
- 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236,
- 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238,
- 239, 254, 0, 0,
+ 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 2, 2, 2,
+ 17, 17, 32, 32, 32, 48, 48, 33, 48, 18, 33, 3, 18, 3, 3,
+ 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 80, 80, 65,
+ 80, 50, 65, 35, 50, 20, 35, 5, 20, 5, 5, 6, 6, 6, 21,
+ 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 112, 112, 97,
+ 112, 82, 97, 67, 82, 52, 67, 37, 52, 22, 37, 7, 22, 7, 7,
+ 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98,
+ 113, 113, 128, 128, 128, 144, 144, 129, 144, 114, 129, 99, 114, 84, 99,
+ 69, 84, 54, 69, 39, 54, 24, 39, 9, 24, 9, 9, 10, 10, 10,
+ 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130,
+ 130, 145, 145, 160, 160, 160, 176, 176, 161, 176, 146, 161, 131, 146, 116,
+ 131, 101, 116, 86, 101, 71, 86, 56, 71, 41, 56, 26, 41, 11, 26,
+ 11, 11, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
+ 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192,
+ 208, 208, 193, 208, 178, 193, 163, 178, 148, 163, 133, 148, 118, 133, 103,
+ 118, 88, 103, 73, 88, 58, 73, 43, 58, 28, 43, 13, 28, 13, 13,
+ 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104,
+ 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224,
+ 224, 224, 225, 240, 210, 225, 195, 210, 180, 195, 165, 180, 150, 165, 135,
+ 150, 120, 135, 105, 120, 90, 105, 75, 90, 60, 75, 45, 60, 30, 45,
+ 15, 30, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106, 121, 121,
+ 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226, 226, 241,
+ 227, 242, 212, 227, 197, 212, 182, 197, 167, 182, 152, 167, 137, 152, 122,
+ 137, 107, 122, 92, 107, 77, 92, 62, 77, 47, 62, 63, 78, 78, 93,
+ 93, 108, 108, 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198,
+ 213, 213, 228, 228, 243, 229, 244, 214, 229, 199, 214, 184, 199, 169, 184,
+ 154, 169, 139, 154, 124, 139, 109, 124, 94, 109, 79, 94, 95, 110, 110,
+ 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230,
+ 230, 245, 231, 246, 216, 231, 201, 216, 186, 201, 171, 186, 156, 171, 141,
+ 156, 126, 141, 111, 126, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202,
+ 202, 217, 217, 232, 232, 247, 233, 248, 218, 233, 203, 218, 188, 203, 173,
+ 188, 158, 173, 143, 158, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234,
+ 234, 249, 235, 250, 220, 235, 205, 220, 190, 205, 175, 190, 191, 206, 206,
+ 221, 221, 236, 236, 251, 237, 252, 222, 237, 207, 222, 223, 238, 238, 253,
+ 239, 254, 0, 0
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160,
@@ -3328,1899 +2224,162 @@ DECLARE_ALIGNED(16, static const int16_t,
983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020,
990, 1021, 991, 1022, 0, 0,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t,
default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 32, 0, 32, 32, 1, 32, 33, 1, 64, 64,
- 33, 64, 2, 33, 96, 96, 34, 2, 65, 96, 34, 65, 128, 128,
- 97, 128, 3, 34, 66, 97, 35, 3, 35, 66, 98, 129, 129, 160,
- 160, 161, 4, 35, 67, 98, 192, 192, 36, 4, 130, 161, 161, 192,
- 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 225,
- 131, 162, 37, 68, 100, 131, 37, 5, 194, 225, 225, 256, 256, 257,
- 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 38, 6, 195, 226,
- 257, 288, 101, 132, 288, 289, 38, 69, 164, 195, 133, 164, 258, 289,
- 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 321, 39, 7,
- 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352,
- 352, 353, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322,
- 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 40, 8,
- 384, 385, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103,
- 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386,
- 416, 417, 293, 324, 324, 355, 41, 9, 41, 72, 386, 417, 199, 230,
- 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262,
- 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 449, 42, 73,
- 294, 325, 200, 231, 42, 10, 357, 388, 137, 168, 263, 294, 388, 419,
- 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200,
- 11, 42, 106, 137, 480, 481, 450, 481, 358, 389, 264, 295, 201, 232,
- 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 43, 11, 481, 512,
- 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 513,
- 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483,
- 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265,
- 297, 328, 422, 453, 44, 12, 391, 422, 171, 202, 76, 107, 514, 545,
- 453, 484, 544, 545, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329,
- 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485,
- 45, 76, 172, 203, 330, 361, 576, 577, 45, 13, 267, 298, 546, 577,
- 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578,
- 14, 45, 46, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204,
- 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173,
- 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142,
- 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 49, 17, 207, 238,
- 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50,
- 51, 82, 83, 114, 608, 609, 484, 515, 360, 391, 236, 267, 112, 143,
- 51, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392,
- 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 52, 20, 672, 672,
- 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424,
- 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145,
- 52, 83, 21, 52, 53, 21, 704, 704, 673, 704, 642, 673, 611, 642,
- 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425,
- 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208,
- 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 54, 22, 705, 736,
- 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457,
- 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178,
- 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582,
- 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117,
- 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118,
- 736, 737, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 55, 23,
- 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427,
- 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 56, 24,
- 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583,
- 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304,
- 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 57, 25,
- 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677,
- 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460,
- 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243,
- 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 58, 26,
- 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616,
- 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337,
- 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58,
- 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493,
- 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90,
- 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122,
- 864, 865, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151,
- 59, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648,
- 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183,
- 121, 152, 28, 59, 60, 28, 928, 928, 897, 928, 866, 897, 804, 835,
- 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556,
- 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277,
- 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 61, 29, 960, 960,
- 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774,
- 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557,
- 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340,
- 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123,
- 61, 92, 30, 61, 62, 30, 961, 992, 930, 961, 899, 930, 837, 868,
- 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589,
- 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310,
- 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993,
- 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590,
- 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125,
- 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374,
- 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403,
- 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683,
- 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280,
- 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808,
- 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529,
- 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219,
- 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871,
- 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654,
- 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437,
- 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220,
- 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841,
- 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562,
- 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252,
- 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749,
- 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346,
- 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502,
- 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407,
- 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656,
- 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967,
- 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688,
- 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409,
- 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906,
- 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689,
- 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472,
- 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000,
- 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721,
- 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442,
- 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846,
- 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381,
- 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382,
- 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908,
- 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443,
- 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785,
- 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475,
- 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879,
- 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662,
- 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445,
- 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818,
- 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508,
- 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757,
- 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882,
- 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539,
- 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571,
- 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789,
- 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007,
- 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790,
- 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573,
- 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822,
- 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009,
- 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606,
- 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667,
- 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979,
- 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700,
- 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856,
- 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012,
- 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733,
- 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734,
- 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920,
- 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828,
- 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860,
- 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861,
- 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894,
- 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019,
- 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021,
- 959, 990, 991, 1022, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- v2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
- 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
- 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
- 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
- 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
- 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
- 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
- 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
- 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
- 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
- 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
- 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
- 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
- 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
- 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
- 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
- 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
- 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
- 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
- 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
- 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
- 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
- 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
- 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
- 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
- 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
- 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
- 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
- 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
- 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
- 143, 174, 269, 300, 393, 424, 453, 484, 480, 480, 481, 512, 238, 269,
- 424, 455, 482, 513, 175, 206, 454, 485, 332, 363, 363, 394, 483, 514,
- 301, 332, 394, 425, 484, 515, 207, 238, 455, 486, 270, 301, 425, 456,
- 485, 516, 364, 395, 239, 270, 456, 487, 512, 512, 333, 364, 395, 426,
- 513, 544, 486, 517, 514, 545, 302, 333, 426, 457, 515, 546, 487, 518,
- 516, 547, 271, 302, 457, 488, 365, 396, 396, 427, 517, 548, 334, 365,
- 427, 458, 488, 519, 544, 544, 303, 334, 458, 489, 518, 549, 545, 576,
- 546, 577, 547, 578, 489, 520, 397, 428, 519, 550, 366, 397, 428, 459,
- 548, 579, 335, 366, 459, 490, 549, 580, 520, 551, 490, 521, 550, 581,
- 576, 576, 577, 608, 398, 429, 429, 460, 578, 609, 367, 398, 460, 491,
- 521, 552, 579, 610, 551, 582, 491, 522, 580, 611, 581, 612, 552, 583,
- 522, 553, 430, 461, 399, 430, 461, 492, 582, 613, 492, 523, 608, 608,
- 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643,
- 431, 462, 462, 493, 554, 585, 493, 524, 584, 615, 613, 644, 524, 555,
- 614, 645, 640, 640, 585, 616, 641, 672, 555, 586, 642, 673, 615, 646,
- 463, 494, 643, 674, 494, 525, 644, 675, 525, 556, 586, 617, 616, 647,
- 645, 676, 556, 587, 646, 677, 495, 526, 617, 648, 587, 618, 672, 672,
- 526, 557, 673, 704, 674, 705, 647, 678, 557, 588, 675, 706, 618, 649,
- 676, 707, 588, 619, 648, 679, 677, 708, 527, 558, 558, 589, 678, 709,
- 619, 650, 649, 680, 704, 704, 589, 620, 705, 736, 679, 710, 706, 737,
- 707, 738, 650, 681, 620, 651, 708, 739, 680, 711, 559, 590, 709, 740,
- 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768,
- 711, 742, 738, 769, 682, 713, 652, 683, 739, 770, 591, 622, 740, 771,
- 712, 743, 622, 653, 741, 772, 683, 714, 653, 684, 713, 744, 742, 773,
- 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801,
- 771, 802, 654, 685, 744, 775, 772, 803, 715, 746, 773, 804, 685, 716,
- 745, 776, 774, 805, 655, 686, 716, 747, 775, 806, 746, 777, 800, 800,
- 801, 832, 686, 717, 802, 833, 803, 834, 776, 807, 804, 835, 747, 778,
- 717, 748, 805, 836, 777, 808, 687, 718, 806, 837, 748, 779, 718, 749,
- 778, 809, 807, 838, 832, 832, 833, 864, 834, 865, 835, 866, 808, 839,
- 749, 780, 836, 867, 779, 810, 719, 750, 837, 868, 809, 840, 838, 869,
- 780, 811, 750, 781, 810, 841, 839, 870, 864, 864, 865, 896, 866, 897,
- 840, 871, 867, 898, 781, 812, 811, 842, 868, 899, 751, 782, 869, 900,
- 841, 872, 812, 843, 870, 901, 782, 813, 842, 873, 871, 902, 896, 896,
- 897, 928, 813, 844, 898, 929, 872, 903, 783, 814, 843, 874, 899, 930,
- 900, 931, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933, 874, 905,
- 903, 934, 845, 876, 928, 928, 815, 846, 929, 960, 930, 961, 875, 906,
- 904, 935, 931, 962, 932, 963, 905, 936, 846, 877, 933, 964, 876, 907,
- 934, 965, 906, 937, 935, 966, 877, 908, 847, 878, 960, 960, 907, 938,
- 961, 992, 936, 967, 962, 993, 963, 994, 964, 995, 878, 909, 937, 968,
- 908, 939, 965, 996, 966, 997, 938, 969, 879, 910, 909, 940, 967, 998,
- 939, 970, 968, 999, 910, 941, 969, 1000, 940, 971, 970, 1001, 911, 942,
- 941, 972, 971, 1002, 942, 973, 972, 1003, 943, 974, 973, 1004, 974, 1005,
- 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175,
- 16, 16, 17, 48, 176, 207, 49, 80, 81, 112, 113, 144, 208, 239,
- 145, 176, 240, 271, 17, 17, 18, 49, 177, 208, 50, 81, 82, 113,
- 272, 303, 209, 240, 114, 145, 146, 177, 241, 272, 304, 335, 178, 209,
- 18, 18, 19, 50, 51, 82, 83, 114, 273, 304, 210, 241, 115, 146,
- 336, 367, 147, 178, 242, 273, 305, 336, 179, 210, 19, 19, 368, 399,
- 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147,
- 306, 337, 148, 179, 243, 274, 400, 431, 369, 400, 180, 211, 20, 20,
- 21, 52, 275, 306, 53, 84, 338, 369, 212, 243, 85, 116, 432, 463,
- 117, 148, 401, 432, 307, 338, 244, 275, 149, 180, 370, 401, 181, 212,
- 276, 307, 464, 495, 339, 370, 21, 21, 22, 53, 433, 464, 54, 85,
- 213, 244, 86, 117, 402, 433, 118, 149, 308, 339, 245, 276, 371, 402,
- 150, 181, 496, 527, 465, 496, 182, 213, 434, 465, 340, 371, 277, 308,
- 22, 22, 23, 54, 403, 434, 55, 86, 214, 245, 87, 118, 309, 340,
- 372, 403, 119, 150, 497, 528, 528, 559, 246, 277, 466, 497, 151, 182,
- 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55,
- 215, 246, 529, 560, 56, 87, 498, 529, 560, 591, 310, 341, 88, 119,
- 373, 404, 467, 498, 120, 151, 247, 278, 436, 467, 152, 183, 342, 373,
- 279, 310, 405, 436, 184, 215, 530, 561, 561, 592, 499, 530, 592, 623,
- 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342,
- 89, 120, 437, 468, 248, 279, 121, 152, 562, 593, 153, 184, 343, 374,
- 531, 562, 593, 624, 406, 437, 500, 531, 624, 655, 280, 311, 185, 216,
- 469, 500, 375, 406, 217, 248, 25, 25, 312, 343, 26, 57, 58, 89,
- 438, 469, 90, 121, 563, 594, 594, 625, 249, 280, 532, 563, 625, 656,
- 122, 153, 344, 375, 501, 532, 656, 687, 407, 438, 154, 185, 281, 312,
- 470, 501, 186, 217, 376, 407, 595, 626, 564, 595, 626, 657, 218, 249,
- 313, 344, 439, 470, 26, 26, 27, 58, 533, 564, 657, 688, 59, 90,
- 91, 122, 250, 281, 502, 533, 688, 719, 123, 154, 408, 439, 345, 376,
- 155, 186, 471, 502, 282, 313, 596, 627, 627, 658, 187, 218, 565, 596,
- 658, 689, 377, 408, 440, 471, 534, 565, 689, 720, 314, 345, 219, 250,
- 27, 27, 28, 59, 503, 534, 720, 751, 60, 91, 92, 123, 251, 282,
- 409, 440, 346, 377, 124, 155, 628, 659, 472, 503, 597, 628, 659, 690,
- 566, 597, 690, 721, 156, 187, 283, 314, 535, 566, 721, 752, 188, 219,
- 378, 409, 441, 472, 315, 346, 504, 535, 752, 783, 220, 251, 28, 28,
- 629, 660, 660, 691, 29, 60, 61, 92, 410, 441, 598, 629, 691, 722,
- 252, 283, 93, 124, 347, 378, 473, 504, 567, 598, 722, 753, 125, 156,
- 284, 315, 536, 567, 753, 784, 157, 188, 442, 473, 379, 410, 189, 220,
- 505, 536, 784, 815, 661, 692, 316, 347, 630, 661, 692, 723, 221, 252,
- 599, 630, 723, 754, 411, 442, 29, 29, 568, 599, 754, 785, 30, 61,
- 474, 505, 62, 93, 253, 284, 348, 379, 94, 125, 537, 568, 785, 816,
- 126, 157, 285, 316, 158, 189, 443, 474, 662, 693, 693, 724, 380, 411,
- 631, 662, 724, 755, 506, 537, 816, 847, 190, 221, 600, 631, 755, 786,
- 317, 348, 222, 253, 569, 600, 786, 817, 412, 443, 475, 506, 30, 30,
- 31, 62, 349, 380, 254, 285, 63, 94, 538, 569, 817, 848, 694, 725,
- 95, 126, 663, 694, 725, 756, 632, 663, 756, 787, 127, 158, 444, 475,
- 286, 317, 381, 412, 507, 538, 848, 879, 159, 190, 601, 632, 787, 818,
- 191, 222, 318, 349, 570, 601, 818, 849, 476, 507, 223, 254, 413, 444,
- 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381,
- 255, 286, 633, 664, 788, 819, 445, 476, 602, 633, 819, 850, 508, 539,
- 880, 911, 287, 318, 382, 413, 571, 602, 850, 881, 727, 758, 696, 727,
- 758, 789, 319, 350, 477, 508, 665, 696, 789, 820, 414, 445, 540, 571,
- 881, 912, 634, 665, 820, 851, 351, 382, 603, 634, 851, 882, 446, 477,
- 509, 540, 912, 943, 383, 414, 728, 759, 759, 790, 572, 603, 882, 913,
- 697, 728, 790, 821, 666, 697, 821, 852, 478, 509, 635, 666, 852, 883,
- 415, 446, 541, 572, 913, 944, 604, 635, 883, 914, 760, 791, 729, 760,
- 791, 822, 510, 541, 944, 975, 447, 478, 698, 729, 822, 853, 573, 604,
- 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 542, 573,
- 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
- 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
- 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
- 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
- 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
- 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
- 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
- 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
- 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
- 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
- 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
- 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
- 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
- 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
- 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
- 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
- 990, 1021, 991, 1022, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- h2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
- 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
- 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
- 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
- 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
- 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
- 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
- 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
- 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
- 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
- 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
- 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
- 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
- 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
- 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
- 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
- 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
- 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
- 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
- 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
- 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
- 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
- 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
- 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
- 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
- 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
- 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
- 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
- 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
- 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
- 143, 174, 269, 300, 393, 424, 453, 484, 15, 15, 16, 47, 48, 79,
- 238, 269, 424, 455, 175, 206, 454, 485, 80, 111, 332, 363, 363, 394,
- 301, 332, 394, 425, 112, 143, 207, 238, 455, 486, 270, 301, 425, 456,
- 144, 175, 364, 395, 16, 16, 239, 270, 456, 487, 17, 48, 333, 364,
- 395, 426, 176, 207, 49, 80, 302, 333, 426, 457, 81, 112, 113, 144,
- 208, 239, 271, 302, 457, 488, 365, 396, 396, 427, 145, 176, 334, 365,
- 427, 458, 240, 271, 17, 17, 18, 49, 177, 208, 303, 334, 458, 489,
- 50, 81, 82, 113, 272, 303, 209, 240, 397, 428, 114, 145, 366, 397,
- 428, 459, 335, 366, 459, 490, 146, 177, 241, 272, 304, 335, 178, 209,
- 18, 18, 19, 50, 51, 82, 398, 429, 429, 460, 367, 398, 460, 491,
- 83, 114, 273, 304, 210, 241, 115, 146, 336, 367, 147, 178, 242, 273,
- 305, 336, 430, 461, 399, 430, 461, 492, 179, 210, 19, 19, 368, 399,
- 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147,
- 431, 462, 462, 493, 306, 337, 148, 179, 243, 274, 400, 431, 369, 400,
- 180, 211, 20, 20, 21, 52, 275, 306, 53, 84, 338, 369, 212, 243,
- 85, 116, 463, 494, 432, 463, 117, 148, 401, 432, 307, 338, 244, 275,
- 149, 180, 370, 401, 181, 212, 276, 307, 464, 495, 339, 370, 21, 21,
- 22, 53, 433, 464, 54, 85, 213, 244, 86, 117, 402, 433, 118, 149,
- 308, 339, 245, 276, 371, 402, 150, 181, 465, 496, 182, 213, 434, 465,
- 340, 371, 277, 308, 22, 22, 23, 54, 403, 434, 55, 86, 214, 245,
- 87, 118, 309, 340, 372, 403, 119, 150, 246, 277, 466, 497, 151, 182,
- 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55,
- 215, 246, 56, 87, 310, 341, 88, 119, 373, 404, 467, 498, 120, 151,
- 247, 278, 436, 467, 152, 183, 342, 373, 279, 310, 405, 436, 184, 215,
- 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342,
- 89, 120, 437, 468, 248, 279, 121, 152, 153, 184, 343, 374, 406, 437,
- 280, 311, 185, 216, 469, 500, 375, 406, 217, 248, 25, 25, 312, 343,
- 26, 57, 58, 89, 438, 469, 90, 121, 249, 280, 122, 153, 344, 375,
- 407, 438, 154, 185, 281, 312, 470, 501, 186, 217, 376, 407, 218, 249,
- 313, 344, 439, 470, 26, 26, 27, 58, 59, 90, 91, 122, 250, 281,
- 123, 154, 408, 439, 345, 376, 155, 186, 471, 502, 282, 313, 187, 218,
- 377, 408, 440, 471, 314, 345, 219, 250, 27, 27, 28, 59, 60, 91,
- 92, 123, 251, 282, 409, 440, 346, 377, 124, 155, 472, 503, 156, 187,
- 283, 314, 188, 219, 378, 409, 441, 472, 315, 346, 220, 251, 28, 28,
- 29, 60, 61, 92, 410, 441, 252, 283, 93, 124, 347, 378, 473, 504,
- 125, 156, 284, 315, 157, 188, 442, 473, 379, 410, 189, 220, 316, 347,
- 221, 252, 411, 442, 29, 29, 30, 61, 474, 505, 62, 93, 253, 284,
- 348, 379, 94, 125, 126, 157, 285, 316, 158, 189, 443, 474, 380, 411,
- 190, 221, 317, 348, 222, 253, 412, 443, 475, 506, 30, 30, 31, 62,
- 349, 380, 254, 285, 63, 94, 95, 126, 127, 158, 444, 475, 286, 317,
- 381, 412, 159, 190, 191, 222, 318, 349, 476, 507, 223, 254, 413, 444,
- 350, 381, 255, 286, 445, 476, 287, 318, 382, 413, 319, 350, 477, 508,
- 414, 445, 351, 382, 446, 477, 383, 414, 478, 509, 415, 446, 447, 478,
- 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516,
- 512, 512, 513, 544, 486, 517, 514, 545, 515, 546, 487, 518, 516, 547,
- 517, 548, 488, 519, 544, 544, 518, 549, 545, 576, 546, 577, 547, 578,
- 489, 520, 519, 550, 548, 579, 549, 580, 520, 551, 490, 521, 550, 581,
- 576, 576, 577, 608, 578, 609, 521, 552, 579, 610, 551, 582, 491, 522,
- 580, 611, 581, 612, 552, 583, 522, 553, 582, 613, 492, 523, 608, 608,
- 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643,
- 554, 585, 493, 524, 584, 615, 613, 644, 524, 555, 614, 645, 640, 640,
- 585, 616, 641, 672, 555, 586, 642, 673, 615, 646, 643, 674, 494, 525,
- 644, 675, 525, 556, 586, 617, 616, 647, 645, 676, 556, 587, 646, 677,
- 495, 526, 617, 648, 587, 618, 672, 672, 526, 557, 673, 704, 674, 705,
- 647, 678, 557, 588, 675, 706, 618, 649, 676, 707, 588, 619, 648, 679,
- 677, 708, 496, 527, 527, 558, 558, 589, 678, 709, 619, 650, 649, 680,
- 704, 704, 589, 620, 705, 736, 679, 710, 706, 737, 707, 738, 650, 681,
- 620, 651, 497, 528, 528, 559, 708, 739, 680, 711, 559, 590, 709, 740,
- 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768,
- 529, 560, 711, 742, 498, 529, 560, 591, 738, 769, 682, 713, 652, 683,
- 739, 770, 591, 622, 740, 771, 712, 743, 622, 653, 741, 772, 683, 714,
- 653, 684, 713, 744, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623,
- 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801,
- 771, 802, 654, 685, 744, 775, 772, 803, 562, 593, 531, 562, 593, 624,
- 715, 746, 773, 804, 685, 716, 500, 531, 624, 655, 745, 776, 774, 805,
- 655, 686, 716, 747, 775, 806, 746, 777, 800, 800, 801, 832, 686, 717,
- 802, 833, 563, 594, 594, 625, 803, 834, 532, 563, 625, 656, 776, 807,
- 804, 835, 501, 532, 656, 687, 747, 778, 717, 748, 805, 836, 777, 808,
- 687, 718, 806, 837, 748, 779, 595, 626, 564, 595, 626, 657, 718, 749,
- 778, 809, 807, 838, 832, 832, 533, 564, 657, 688, 833, 864, 834, 865,
- 835, 866, 502, 533, 688, 719, 808, 839, 749, 780, 836, 867, 779, 810,
- 719, 750, 837, 868, 809, 840, 596, 627, 627, 658, 565, 596, 658, 689,
- 838, 869, 780, 811, 750, 781, 534, 565, 689, 720, 810, 841, 839, 870,
- 864, 864, 503, 534, 720, 751, 865, 896, 866, 897, 840, 871, 867, 898,
- 781, 812, 811, 842, 628, 659, 868, 899, 751, 782, 597, 628, 659, 690,
- 566, 597, 690, 721, 869, 900, 841, 872, 535, 566, 721, 752, 812, 843,
- 870, 901, 782, 813, 842, 873, 504, 535, 752, 783, 871, 902, 629, 660,
- 660, 691, 896, 896, 897, 928, 598, 629, 691, 722, 813, 844, 898, 929,
- 872, 903, 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 900, 931,
- 536, 567, 753, 784, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933,
- 505, 536, 784, 815, 661, 692, 630, 661, 692, 723, 874, 905, 599, 630,
- 723, 754, 903, 934, 845, 876, 568, 599, 754, 785, 928, 928, 815, 846,
- 929, 960, 930, 961, 875, 906, 904, 935, 931, 962, 537, 568, 785, 816,
- 932, 963, 905, 936, 662, 693, 693, 724, 846, 877, 933, 964, 876, 907,
- 631, 662, 724, 755, 506, 537, 816, 847, 934, 965, 600, 631, 755, 786,
- 906, 937, 569, 600, 786, 817, 935, 966, 877, 908, 847, 878, 960, 960,
- 907, 938, 961, 992, 936, 967, 538, 569, 817, 848, 962, 993, 694, 725,
- 663, 694, 725, 756, 963, 994, 632, 663, 756, 787, 964, 995, 878, 909,
- 937, 968, 507, 538, 848, 879, 908, 939, 601, 632, 787, 818, 965, 996,
- 966, 997, 570, 601, 818, 849, 938, 969, 879, 910, 909, 940, 967, 998,
- 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 939, 970,
- 633, 664, 788, 819, 968, 999, 602, 633, 819, 850, 910, 941, 508, 539,
- 880, 911, 969, 1000, 940, 971, 571, 602, 850, 881, 727, 758, 696, 727,
- 758, 789, 970, 1001, 665, 696, 789, 820, 911, 942, 941, 972, 540, 571,
- 881, 912, 634, 665, 820, 851, 971, 1002, 603, 634, 851, 882, 942, 973,
- 509, 540, 912, 943, 728, 759, 759, 790, 972, 1003, 572, 603, 882, 913,
- 697, 728, 790, 821, 666, 697, 821, 852, 943, 974, 635, 666, 852, 883,
- 541, 572, 913, 944, 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760,
- 791, 822, 510, 541, 944, 975, 974, 1005, 698, 729, 822, 853, 573, 604,
- 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 975, 1006, 542, 573,
- 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
- 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
- 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
- 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
- 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
- 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
- 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
- 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
- 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
- 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
- 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
- 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
- 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
- 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
- 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
- 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
- 990, 1021, 991, 1022, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- qtr_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
- 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
- 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
- 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
- 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
- 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
- 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
- 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
- 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
- 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
- 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
- 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
- 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
- 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
- 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
- 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
- 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
- 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
- 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
- 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
- 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
- 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
- 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
- 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
- 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
- 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
- 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
- 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
- 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
- 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
- 143, 174, 269, 300, 393, 424, 453, 484, 238, 269, 424, 455, 175, 206,
- 454, 485, 332, 363, 363, 394, 301, 332, 394, 425, 207, 238, 455, 486,
- 270, 301, 425, 456, 364, 395, 239, 270, 456, 487, 333, 364, 395, 426,
- 302, 333, 426, 457, 271, 302, 457, 488, 365, 396, 396, 427, 334, 365,
- 427, 458, 303, 334, 458, 489, 397, 428, 366, 397, 428, 459, 335, 366,
- 459, 490, 398, 429, 429, 460, 367, 398, 460, 491, 430, 461, 399, 430,
- 461, 492, 431, 462, 462, 493, 463, 494, 15, 15, 480, 480, 16, 47,
- 481, 512, 48, 79, 482, 513, 80, 111, 483, 514, 112, 143, 484, 515,
- 144, 175, 485, 516, 16, 16, 512, 512, 17, 48, 513, 544, 176, 207,
- 486, 517, 49, 80, 514, 545, 81, 112, 515, 546, 113, 144, 208, 239,
- 487, 518, 516, 547, 145, 176, 517, 548, 240, 271, 488, 519, 17, 17,
- 544, 544, 18, 49, 177, 208, 518, 549, 545, 576, 50, 81, 546, 577,
- 82, 113, 547, 578, 272, 303, 489, 520, 209, 240, 519, 550, 114, 145,
- 548, 579, 146, 177, 549, 580, 241, 272, 520, 551, 304, 335, 490, 521,
- 178, 209, 550, 581, 18, 18, 576, 576, 19, 50, 577, 608, 51, 82,
- 578, 609, 83, 114, 273, 304, 521, 552, 579, 610, 210, 241, 551, 582,
- 115, 146, 336, 367, 491, 522, 580, 611, 147, 178, 581, 612, 242, 273,
- 552, 583, 305, 336, 522, 553, 179, 210, 582, 613, 19, 19, 368, 399,
- 492, 523, 608, 608, 20, 51, 609, 640, 52, 83, 610, 641, 274, 305,
- 553, 584, 84, 115, 611, 642, 211, 242, 337, 368, 523, 554, 583, 614,
- 116, 147, 612, 643, 306, 337, 554, 585, 148, 179, 243, 274, 400, 431,
- 493, 524, 584, 615, 613, 644, 369, 400, 524, 555, 180, 211, 614, 645,
- 20, 20, 640, 640, 21, 52, 275, 306, 585, 616, 641, 672, 53, 84,
- 338, 369, 555, 586, 642, 673, 212, 243, 615, 646, 85, 116, 643, 674,
- 432, 463, 494, 525, 117, 148, 644, 675, 401, 432, 525, 556, 307, 338,
- 586, 617, 244, 275, 616, 647, 149, 180, 645, 676, 370, 401, 556, 587,
- 181, 212, 646, 677, 276, 307, 464, 495, 495, 526, 617, 648, 339, 370,
- 587, 618, 21, 21, 672, 672, 22, 53, 433, 464, 526, 557, 673, 704,
- 54, 85, 674, 705, 213, 244, 647, 678, 86, 117, 402, 433, 557, 588,
- 675, 706, 118, 149, 308, 339, 618, 649, 676, 707, 245, 276, 371, 402,
- 588, 619, 648, 679, 150, 181, 677, 708, 496, 527, 465, 496, 527, 558,
- 182, 213, 434, 465, 558, 589, 678, 709, 340, 371, 619, 650, 277, 308,
- 649, 680, 22, 22, 704, 704, 23, 54, 403, 434, 589, 620, 705, 736,
- 55, 86, 214, 245, 679, 710, 706, 737, 87, 118, 707, 738, 309, 340,
- 650, 681, 372, 403, 620, 651, 119, 150, 497, 528, 528, 559, 708, 739,
- 246, 277, 680, 711, 466, 497, 559, 590, 151, 182, 709, 740, 435, 466,
- 590, 621, 341, 372, 651, 682, 183, 214, 278, 309, 681, 712, 710, 741,
- 404, 435, 621, 652, 23, 23, 736, 736, 24, 55, 737, 768, 215, 246,
- 529, 560, 711, 742, 56, 87, 498, 529, 560, 591, 738, 769, 310, 341,
- 682, 713, 88, 119, 373, 404, 652, 683, 739, 770, 467, 498, 591, 622,
- 120, 151, 740, 771, 247, 278, 712, 743, 436, 467, 622, 653, 152, 183,
- 741, 772, 342, 373, 683, 714, 279, 310, 405, 436, 653, 684, 713, 744,
- 184, 215, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623, 24, 24,
- 216, 247, 468, 499, 623, 654, 743, 774, 768, 768, 25, 56, 769, 800,
- 374, 405, 684, 715, 57, 88, 311, 342, 714, 745, 770, 801, 89, 120,
- 771, 802, 437, 468, 654, 685, 248, 279, 744, 775, 121, 152, 772, 803,
- 562, 593, 153, 184, 343, 374, 531, 562, 593, 624, 715, 746, 773, 804,
- 406, 437, 685, 716, 500, 531, 624, 655, 280, 311, 745, 776, 185, 216,
- 774, 805, 469, 500, 655, 686, 375, 406, 716, 747, 217, 248, 775, 806,
- 25, 25, 312, 343, 746, 777, 800, 800, 26, 57, 801, 832, 58, 89,
- 438, 469, 686, 717, 802, 833, 90, 121, 563, 594, 594, 625, 803, 834,
- 249, 280, 532, 563, 625, 656, 776, 807, 122, 153, 804, 835, 344, 375,
- 501, 532, 656, 687, 747, 778, 407, 438, 717, 748, 154, 185, 805, 836,
- 281, 312, 777, 808, 470, 501, 687, 718, 186, 217, 806, 837, 376, 407,
- 748, 779, 595, 626, 564, 595, 626, 657, 218, 249, 313, 344, 439, 470,
- 718, 749, 778, 809, 807, 838, 26, 26, 832, 832, 27, 58, 533, 564,
- 657, 688, 833, 864, 59, 90, 834, 865, 91, 122, 835, 866, 250, 281,
- 502, 533, 688, 719, 808, 839, 123, 154, 408, 439, 749, 780, 836, 867,
- 345, 376, 779, 810, 155, 186, 471, 502, 719, 750, 837, 868, 282, 313,
- 809, 840, 596, 627, 627, 658, 187, 218, 565, 596, 658, 689, 838, 869,
- 377, 408, 780, 811, 440, 471, 750, 781, 534, 565, 689, 720, 314, 345,
- 810, 841, 219, 250, 839, 870, 27, 27, 864, 864, 28, 59, 503, 534,
- 720, 751, 865, 896, 60, 91, 866, 897, 92, 123, 251, 282, 840, 871,
- 867, 898, 409, 440, 781, 812, 346, 377, 811, 842, 124, 155, 628, 659,
- 868, 899, 472, 503, 751, 782, 597, 628, 659, 690, 566, 597, 690, 721,
- 156, 187, 869, 900, 283, 314, 841, 872, 535, 566, 721, 752, 188, 219,
- 378, 409, 812, 843, 870, 901, 441, 472, 782, 813, 315, 346, 842, 873,
- 504, 535, 752, 783, 220, 251, 871, 902, 28, 28, 629, 660, 660, 691,
- 896, 896, 29, 60, 897, 928, 61, 92, 410, 441, 598, 629, 691, 722,
- 813, 844, 898, 929, 252, 283, 872, 903, 93, 124, 347, 378, 473, 504,
- 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 125, 156, 900, 931,
- 284, 315, 536, 567, 753, 784, 873, 904, 157, 188, 901, 932, 442, 473,
- 814, 845, 379, 410, 844, 875, 189, 220, 902, 933, 505, 536, 784, 815,
- 661, 692, 316, 347, 630, 661, 692, 723, 874, 905, 221, 252, 599, 630,
- 723, 754, 903, 934, 411, 442, 845, 876, 29, 29, 568, 599, 754, 785,
- 928, 928, 30, 61, 474, 505, 815, 846, 929, 960, 62, 93, 930, 961,
- 253, 284, 348, 379, 875, 906, 904, 935, 94, 125, 931, 962, 537, 568,
- 785, 816, 126, 157, 932, 963, 285, 316, 905, 936, 158, 189, 443, 474,
- 662, 693, 693, 724, 846, 877, 933, 964, 380, 411, 876, 907, 631, 662,
- 724, 755, 506, 537, 816, 847, 190, 221, 934, 965, 600, 631, 755, 786,
- 317, 348, 906, 937, 222, 253, 569, 600, 786, 817, 935, 966, 412, 443,
- 877, 908, 475, 506, 847, 878, 30, 30, 960, 960, 31, 62, 349, 380,
- 907, 938, 961, 992, 254, 285, 936, 967, 63, 94, 538, 569, 817, 848,
- 962, 993, 694, 725, 95, 126, 663, 694, 725, 756, 963, 994, 632, 663,
- 756, 787, 127, 158, 964, 995, 444, 475, 878, 909, 286, 317, 937, 968,
- 381, 412, 507, 538, 848, 879, 908, 939, 159, 190, 601, 632, 787, 818,
- 965, 996, 191, 222, 966, 997, 318, 349, 570, 601, 818, 849, 938, 969,
- 476, 507, 879, 910, 223, 254, 413, 444, 909, 940, 967, 998, 695, 726,
- 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381, 939, 970,
- 255, 286, 633, 664, 788, 819, 968, 999, 445, 476, 602, 633, 819, 850,
- 910, 941, 508, 539, 880, 911, 287, 318, 969, 1000, 382, 413, 940, 971,
- 571, 602, 850, 881, 727, 758, 696, 727, 758, 789, 319, 350, 970, 1001,
- 477, 508, 665, 696, 789, 820, 911, 942, 414, 445, 941, 972, 540, 571,
- 881, 912, 634, 665, 820, 851, 351, 382, 971, 1002, 603, 634, 851, 882,
- 446, 477, 942, 973, 509, 540, 912, 943, 383, 414, 728, 759, 759, 790,
- 972, 1003, 572, 603, 882, 913, 697, 728, 790, 821, 666, 697, 821, 852,
- 478, 509, 943, 974, 635, 666, 852, 883, 415, 446, 541, 572, 913, 944,
- 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760, 791, 822, 510, 541,
- 944, 975, 447, 478, 974, 1005, 698, 729, 822, 853, 573, 604, 914, 945,
- 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 975, 1006, 542, 573,
- 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
- 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
- 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
- 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
- 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
- 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
- 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
- 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
- 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
- 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
- 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
- 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
- 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
- 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
- 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
- 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
+ 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 2, 2,
+ 2, 33, 33, 64, 64, 64, 96, 96, 65, 96, 34, 65, 3, 34,
+ 3, 3, 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128,
+ 160, 160, 129, 160, 98, 129, 67, 98, 36, 67, 5, 36, 5, 5,
+ 6, 6, 6, 37, 37, 68, 68, 99, 99, 130, 130, 161, 161, 192,
+ 192, 192, 224, 224, 193, 224, 162, 193, 131, 162, 100, 131, 69, 100,
+ 38, 69, 7, 38, 7, 7, 8, 8, 8, 39, 39, 70, 70, 101,
+ 101, 132, 132, 163, 163, 194, 194, 225, 225, 256, 256, 256, 288, 288,
+ 257, 288, 226, 257, 195, 226, 164, 195, 133, 164, 102, 133, 71, 102,
+ 40, 71, 9, 40, 9, 9, 10, 10, 10, 41, 41, 72, 72, 103,
+ 103, 134, 134, 165, 165, 196, 196, 227, 227, 258, 258, 289, 289, 320,
+ 320, 320, 352, 352, 321, 352, 290, 321, 259, 290, 228, 259, 197, 228,
+ 166, 197, 135, 166, 104, 135, 73, 104, 42, 73, 11, 42, 11, 11,
+ 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167, 198,
+ 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384,
+ 416, 416, 385, 416, 354, 385, 323, 354, 292, 323, 261, 292, 230, 261,
+ 199, 230, 168, 199, 137, 168, 106, 137, 75, 106, 44, 75, 13, 44,
+ 13, 13, 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169,
+ 169, 200, 200, 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386,
+ 386, 417, 417, 448, 448, 448, 480, 480, 449, 480, 418, 449, 387, 418,
+ 356, 387, 325, 356, 294, 325, 263, 294, 232, 263, 201, 232, 170, 201,
+ 139, 170, 108, 139, 77, 108, 46, 77, 15, 46, 15, 15, 16, 16,
+ 16, 47, 47, 78, 78, 109, 109, 140, 140, 171, 171, 202, 202, 233,
+ 233, 264, 264, 295, 295, 326, 326, 357, 357, 388, 388, 419, 419, 450,
+ 450, 481, 481, 512, 512, 512, 544, 544, 513, 544, 482, 513, 451, 482,
+ 420, 451, 389, 420, 358, 389, 327, 358, 296, 327, 265, 296, 234, 265,
+ 203, 234, 172, 203, 141, 172, 110, 141, 79, 110, 48, 79, 17, 48,
+ 17, 17, 18, 18, 18, 49, 49, 80, 80, 111, 111, 142, 142, 173,
+ 173, 204, 204, 235, 235, 266, 266, 297, 297, 328, 328, 359, 359, 390,
+ 390, 421, 421, 452, 452, 483, 483, 514, 514, 545, 545, 576, 576, 576,
+ 608, 608, 577, 608, 546, 577, 515, 546, 484, 515, 453, 484, 422, 453,
+ 391, 422, 360, 391, 329, 360, 298, 329, 267, 298, 236, 267, 205, 236,
+ 174, 205, 143, 174, 112, 143, 81, 112, 50, 81, 19, 50, 19, 19,
+ 20, 20, 20, 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206,
+ 206, 237, 237, 268, 268, 299, 299, 330, 330, 361, 361, 392, 392, 423,
+ 423, 454, 454, 485, 485, 516, 516, 547, 547, 578, 578, 609, 609, 640,
+ 640, 640, 672, 672, 641, 672, 610, 641, 579, 610, 548, 579, 517, 548,
+ 486, 517, 455, 486, 424, 455, 393, 424, 362, 393, 331, 362, 300, 331,
+ 269, 300, 238, 269, 207, 238, 176, 207, 145, 176, 114, 145, 83, 114,
+ 52, 83, 21, 52, 21, 21, 22, 22, 22, 53, 53, 84, 84, 115,
+ 115, 146, 146, 177, 177, 208, 208, 239, 239, 270, 270, 301, 301, 332,
+ 332, 363, 363, 394, 394, 425, 425, 456, 456, 487, 487, 518, 518, 549,
+ 549, 580, 580, 611, 611, 642, 642, 673, 673, 704, 704, 704, 736, 736,
+ 705, 736, 674, 705, 643, 674, 612, 643, 581, 612, 550, 581, 519, 550,
+ 488, 519, 457, 488, 426, 457, 395, 426, 364, 395, 333, 364, 302, 333,
+ 271, 302, 240, 271, 209, 240, 178, 209, 147, 178, 116, 147, 85, 116,
+ 54, 85, 23, 54, 23, 23, 24, 24, 24, 55, 55, 86, 86, 117,
+ 117, 148, 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334,
+ 334, 365, 365, 396, 396, 427, 427, 458, 458, 489, 489, 520, 520, 551,
+ 551, 582, 582, 613, 613, 644, 644, 675, 675, 706, 706, 737, 737, 768,
+ 768, 768, 800, 800, 769, 800, 738, 769, 707, 738, 676, 707, 645, 676,
+ 614, 645, 583, 614, 552, 583, 521, 552, 490, 521, 459, 490, 428, 459,
+ 397, 428, 366, 397, 335, 366, 304, 335, 273, 304, 242, 273, 211, 242,
+ 180, 211, 149, 180, 118, 149, 87, 118, 56, 87, 25, 56, 25, 25,
+ 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150, 181, 181, 212,
+ 212, 243, 243, 274, 274, 305, 305, 336, 336, 367, 367, 398, 398, 429,
+ 429, 460, 460, 491, 491, 522, 522, 553, 553, 584, 584, 615, 615, 646,
+ 646, 677, 677, 708, 708, 739, 739, 770, 770, 801, 801, 832, 832, 832,
+ 864, 864, 833, 864, 802, 833, 771, 802, 740, 771, 709, 740, 678, 709,
+ 647, 678, 616, 647, 585, 616, 554, 585, 523, 554, 492, 523, 461, 492,
+ 430, 461, 399, 430, 368, 399, 337, 368, 306, 337, 275, 306, 244, 275,
+ 213, 244, 182, 213, 151, 182, 120, 151, 89, 120, 58, 89, 27, 58,
+ 27, 27, 28, 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183,
+ 183, 214, 214, 245, 245, 276, 276, 307, 307, 338, 338, 369, 369, 400,
+ 400, 431, 431, 462, 462, 493, 493, 524, 524, 555, 555, 586, 586, 617,
+ 617, 648, 648, 679, 679, 710, 710, 741, 741, 772, 772, 803, 803, 834,
+ 834, 865, 865, 896, 896, 896, 928, 928, 897, 928, 866, 897, 835, 866,
+ 804, 835, 773, 804, 742, 773, 711, 742, 680, 711, 649, 680, 618, 649,
+ 587, 618, 556, 587, 525, 556, 494, 525, 463, 494, 432, 463, 401, 432,
+ 370, 401, 339, 370, 308, 339, 277, 308, 246, 277, 215, 246, 184, 215,
+ 153, 184, 122, 153, 91, 122, 60, 91, 29, 60, 29, 29, 30, 30,
+ 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185, 216, 216, 247,
+ 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433, 433, 464,
+ 464, 495, 495, 526, 526, 557, 557, 588, 588, 619, 619, 650, 650, 681,
+ 681, 712, 712, 743, 743, 774, 774, 805, 805, 836, 836, 867, 867, 898,
+ 898, 929, 929, 960, 960, 960, 961, 992, 930, 961, 899, 930, 868, 899,
+ 837, 868, 806, 837, 775, 806, 744, 775, 713, 744, 682, 713, 651, 682,
+ 620, 651, 589, 620, 558, 589, 527, 558, 496, 527, 465, 496, 434, 465,
+ 403, 434, 372, 403, 341, 372, 310, 341, 279, 310, 248, 279, 217, 248,
+ 186, 217, 155, 186, 124, 155, 93, 124, 62, 93, 31, 62, 63, 94,
+ 94, 125, 125, 156, 156, 187, 187, 218, 218, 249, 249, 280, 280, 311,
+ 311, 342, 342, 373, 373, 404, 404, 435, 435, 466, 466, 497, 497, 528,
+ 528, 559, 559, 590, 590, 621, 621, 652, 652, 683, 683, 714, 714, 745,
+ 745, 776, 776, 807, 807, 838, 838, 869, 869, 900, 900, 931, 931, 962,
+ 962, 993, 963, 994, 932, 963, 901, 932, 870, 901, 839, 870, 808, 839,
+ 777, 808, 746, 777, 715, 746, 684, 715, 653, 684, 622, 653, 591, 622,
+ 560, 591, 529, 560, 498, 529, 467, 498, 436, 467, 405, 436, 374, 405,
+ 343, 374, 312, 343, 281, 312, 250, 281, 219, 250, 188, 219, 157, 188,
+ 126, 157, 95, 126, 127, 158, 158, 189, 189, 220, 220, 251, 251, 282,
+ 282, 313, 313, 344, 344, 375, 375, 406, 406, 437, 437, 468, 468, 499,
+ 499, 530, 530, 561, 561, 592, 592, 623, 623, 654, 654, 685, 685, 716,
+ 716, 747, 747, 778, 778, 809, 809, 840, 840, 871, 871, 902, 902, 933,
+ 933, 964, 964, 995, 965, 996, 934, 965, 903, 934, 872, 903, 841, 872,
+ 810, 841, 779, 810, 748, 779, 717, 748, 686, 717, 655, 686, 624, 655,
+ 593, 624, 562, 593, 531, 562, 500, 531, 469, 500, 438, 469, 407, 438,
+ 376, 407, 345, 376, 314, 345, 283, 314, 252, 283, 221, 252, 190, 221,
+ 159, 190, 191, 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377,
+ 377, 408, 408, 439, 439, 470, 470, 501, 501, 532, 532, 563, 563, 594,
+ 594, 625, 625, 656, 656, 687, 687, 718, 718, 749, 749, 780, 780, 811,
+ 811, 842, 842, 873, 873, 904, 904, 935, 935, 966, 966, 997, 967, 998,
+ 936, 967, 905, 936, 874, 905, 843, 874, 812, 843, 781, 812, 750, 781,
+ 719, 750, 688, 719, 657, 688, 626, 657, 595, 626, 564, 595, 533, 564,
+ 502, 533, 471, 502, 440, 471, 409, 440, 378, 409, 347, 378, 316, 347,
+ 285, 316, 254, 285, 223, 254, 255, 286, 286, 317, 317, 348, 348, 379,
+ 379, 410, 410, 441, 441, 472, 472, 503, 503, 534, 534, 565, 565, 596,
+ 596, 627, 627, 658, 658, 689, 689, 720, 720, 751, 751, 782, 782, 813,
+ 813, 844, 844, 875, 875, 906, 906, 937, 937, 968, 968, 999, 969, 1000,
+ 938, 969, 907, 938, 876, 907, 845, 876, 814, 845, 783, 814, 752, 783,
+ 721, 752, 690, 721, 659, 690, 628, 659, 597, 628, 566, 597, 535, 566,
+ 504, 535, 473, 504, 442, 473, 411, 442, 380, 411, 349, 380, 318, 349,
+ 287, 318, 319, 350, 350, 381, 381, 412, 412, 443, 443, 474, 474, 505,
+ 505, 536, 536, 567, 567, 598, 598, 629, 629, 660, 660, 691, 691, 722,
+ 722, 753, 753, 784, 784, 815, 815, 846, 846, 877, 877, 908, 908, 939,
+ 939, 970, 970, 1001, 971, 1002, 940, 971, 909, 940, 878, 909, 847, 878,
+ 816, 847, 785, 816, 754, 785, 723, 754, 692, 723, 661, 692, 630, 661,
+ 599, 630, 568, 599, 537, 568, 506, 537, 475, 506, 444, 475, 413, 444,
+ 382, 413, 351, 382, 383, 414, 414, 445, 445, 476, 476, 507, 507, 538,
+ 538, 569, 569, 600, 600, 631, 631, 662, 662, 693, 693, 724, 724, 755,
+ 755, 786, 786, 817, 817, 848, 848, 879, 879, 910, 910, 941, 941, 972,
+ 972, 1003, 973, 1004, 942, 973, 911, 942, 880, 911, 849, 880, 818, 849,
+ 787, 818, 756, 787, 725, 756, 694, 725, 663, 694, 632, 663, 601, 632,
+ 570, 601, 539, 570, 508, 539, 477, 508, 446, 477, 415, 446, 447, 478,
+ 478, 509, 509, 540, 540, 571, 571, 602, 602, 633, 633, 664, 664, 695,
+ 695, 726, 726, 757, 757, 788, 788, 819, 819, 850, 850, 881, 881, 912,
+ 912, 943, 943, 974, 974, 1005, 975, 1006, 944, 975, 913, 944, 882, 913,
+ 851, 882, 820, 851, 789, 820, 758, 789, 727, 758, 696, 727, 665, 696,
+ 634, 665, 603, 634, 572, 603, 541, 572, 510, 541, 479, 510, 511, 542,
+ 542, 573, 573, 604, 604, 635, 635, 666, 666, 697, 697, 728, 728, 759,
+ 759, 790, 790, 821, 821, 852, 852, 883, 883, 914, 914, 945, 945, 976,
+ 976, 1007, 977, 1008, 946, 977, 915, 946, 884, 915, 853, 884, 822, 853,
+ 791, 822, 760, 791, 729, 760, 698, 729, 667, 698, 636, 667, 605, 636,
+ 574, 605, 543, 574, 575, 606, 606, 637, 637, 668, 668, 699, 699, 730,
+ 730, 761, 761, 792, 792, 823, 823, 854, 854, 885, 885, 916, 916, 947,
+ 947, 978, 978, 1009, 979, 1010, 948, 979, 917, 948, 886, 917, 855, 886,
+ 824, 855, 793, 824, 762, 793, 731, 762, 700, 731, 669, 700, 638, 669,
+ 607, 638, 639, 670, 670, 701, 701, 732, 732, 763, 763, 794, 794, 825,
+ 825, 856, 856, 887, 887, 918, 918, 949, 949, 980, 980, 1011, 981, 1012,
+ 950, 981, 919, 950, 888, 919, 857, 888, 826, 857, 795, 826, 764, 795,
+ 733, 764, 702, 733, 671, 702, 703, 734, 734, 765, 765, 796, 796, 827,
+ 827, 858, 858, 889, 889, 920, 920, 951, 951, 982, 982, 1013, 983, 1014,
+ 952, 983, 921, 952, 890, 921, 859, 890, 828, 859, 797, 828, 766, 797,
+ 735, 766, 767, 798, 798, 829, 829, 860, 860, 891, 891, 922, 922, 953,
+ 953, 984, 984, 1015, 985, 1016, 954, 985, 923, 954, 892, 923, 861, 892,
+ 830, 861, 799, 830, 831, 862, 862, 893, 893, 924, 924, 955, 955, 986,
+ 986, 1017, 987, 1018, 956, 987, 925, 956, 894, 925, 863, 894, 895, 926,
+ 926, 957, 957, 988, 988, 1019, 989, 1020, 958, 989, 927, 958, 959, 990,
990, 1021, 991, 1022, 0, 0
};
-#if CONFIG_TX64X64
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_32x64_neighbors[2049 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2,
- 2, 2, 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65,
- 65, 96, 96, 96, 4, 4, 4, 35, 35, 66, 66, 97, 97,
- 128, 128, 128, 5, 5, 5, 36, 36, 67, 67, 98, 98, 129,
- 129, 160, 160, 160, 6, 6, 6, 37, 37, 68, 68, 99, 99,
- 130, 130, 161, 161, 192, 192, 192, 7, 7, 7, 38, 38, 69,
- 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 224, 224, 8,
- 8, 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194,
- 194, 225, 225, 256, 256, 256, 9, 9, 9, 40, 40, 71, 71,
- 102, 102, 133, 133, 164, 164, 195, 195, 226, 226, 257, 257, 288,
- 288, 288, 10, 10, 10, 41, 41, 72, 72, 103, 103, 134, 134,
- 165, 165, 196, 196, 227, 227, 258, 258, 289, 289, 320, 320, 320,
- 11, 11, 11, 42, 42, 73, 73, 104, 104, 135, 135, 166, 166,
- 197, 197, 228, 228, 259, 259, 290, 290, 321, 321, 352, 352, 352,
- 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167,
- 198, 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384,
- 384, 384, 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137,
- 168, 168, 199, 199, 230, 230, 261, 261, 292, 292, 323, 323, 354,
- 354, 385, 385, 416, 416, 416, 14, 14, 14, 45, 45, 76, 76,
- 107, 107, 138, 138, 169, 169, 200, 200, 231, 231, 262, 262, 293,
- 293, 324, 324, 355, 355, 386, 386, 417, 417, 448, 448, 448, 15,
- 15, 15, 46, 46, 77, 77, 108, 108, 139, 139, 170, 170, 201,
- 201, 232, 232, 263, 263, 294, 294, 325, 325, 356, 356, 387, 387,
- 418, 418, 449, 449, 480, 480, 480, 16, 16, 16, 47, 47, 78,
- 78, 109, 109, 140, 140, 171, 171, 202, 202, 233, 233, 264, 264,
- 295, 295, 326, 326, 357, 357, 388, 388, 419, 419, 450, 450, 481,
- 481, 512, 512, 512, 17, 17, 17, 48, 48, 79, 79, 110, 110,
- 141, 141, 172, 172, 203, 203, 234, 234, 265, 265, 296, 296, 327,
- 327, 358, 358, 389, 389, 420, 420, 451, 451, 482, 482, 513, 513,
- 544, 544, 544, 18, 18, 18, 49, 49, 80, 80, 111, 111, 142,
- 142, 173, 173, 204, 204, 235, 235, 266, 266, 297, 297, 328, 328,
- 359, 359, 390, 390, 421, 421, 452, 452, 483, 483, 514, 514, 545,
- 545, 576, 576, 576, 19, 19, 19, 50, 50, 81, 81, 112, 112,
- 143, 143, 174, 174, 205, 205, 236, 236, 267, 267, 298, 298, 329,
- 329, 360, 360, 391, 391, 422, 422, 453, 453, 484, 484, 515, 515,
- 546, 546, 577, 577, 608, 608, 608, 20, 20, 20, 51, 51, 82,
- 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 237, 268, 268,
- 299, 299, 330, 330, 361, 361, 392, 392, 423, 423, 454, 454, 485,
- 485, 516, 516, 547, 547, 578, 578, 609, 609, 640, 640, 640, 21,
- 21, 21, 52, 52, 83, 83, 114, 114, 145, 145, 176, 176, 207,
- 207, 238, 238, 269, 269, 300, 300, 331, 331, 362, 362, 393, 393,
- 424, 424, 455, 455, 486, 486, 517, 517, 548, 548, 579, 579, 610,
- 610, 641, 641, 672, 672, 672, 22, 22, 22, 53, 53, 84, 84,
- 115, 115, 146, 146, 177, 177, 208, 208, 239, 239, 270, 270, 301,
- 301, 332, 332, 363, 363, 394, 394, 425, 425, 456, 456, 487, 487,
- 518, 518, 549, 549, 580, 580, 611, 611, 642, 642, 673, 673, 704,
- 704, 704, 23, 23, 23, 54, 54, 85, 85, 116, 116, 147, 147,
- 178, 178, 209, 209, 240, 240, 271, 271, 302, 302, 333, 333, 364,
- 364, 395, 395, 426, 426, 457, 457, 488, 488, 519, 519, 550, 550,
- 581, 581, 612, 612, 643, 643, 674, 674, 705, 705, 736, 736, 736,
- 24, 24, 24, 55, 55, 86, 86, 117, 117, 148, 148, 179, 179,
- 210, 210, 241, 241, 272, 272, 303, 303, 334, 334, 365, 365, 396,
- 396, 427, 427, 458, 458, 489, 489, 520, 520, 551, 551, 582, 582,
- 613, 613, 644, 644, 675, 675, 706, 706, 737, 737, 768, 768, 768,
- 25, 25, 25, 56, 56, 87, 87, 118, 118, 149, 149, 180, 180,
- 211, 211, 242, 242, 273, 273, 304, 304, 335, 335, 366, 366, 397,
- 397, 428, 428, 459, 459, 490, 490, 521, 521, 552, 552, 583, 583,
- 614, 614, 645, 645, 676, 676, 707, 707, 738, 738, 769, 769, 800,
- 800, 800, 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150,
- 181, 181, 212, 212, 243, 243, 274, 274, 305, 305, 336, 336, 367,
- 367, 398, 398, 429, 429, 460, 460, 491, 491, 522, 522, 553, 553,
- 584, 584, 615, 615, 646, 646, 677, 677, 708, 708, 739, 739, 770,
- 770, 801, 801, 832, 832, 832, 27, 27, 27, 58, 58, 89, 89,
- 120, 120, 151, 151, 182, 182, 213, 213, 244, 244, 275, 275, 306,
- 306, 337, 337, 368, 368, 399, 399, 430, 430, 461, 461, 492, 492,
- 523, 523, 554, 554, 585, 585, 616, 616, 647, 647, 678, 678, 709,
- 709, 740, 740, 771, 771, 802, 802, 833, 833, 864, 864, 864, 28,
- 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, 183, 214,
- 214, 245, 245, 276, 276, 307, 307, 338, 338, 369, 369, 400, 400,
- 431, 431, 462, 462, 493, 493, 524, 524, 555, 555, 586, 586, 617,
- 617, 648, 648, 679, 679, 710, 710, 741, 741, 772, 772, 803, 803,
- 834, 834, 865, 865, 896, 896, 896, 29, 29, 29, 60, 60, 91,
- 91, 122, 122, 153, 153, 184, 184, 215, 215, 246, 246, 277, 277,
- 308, 308, 339, 339, 370, 370, 401, 401, 432, 432, 463, 463, 494,
- 494, 525, 525, 556, 556, 587, 587, 618, 618, 649, 649, 680, 680,
- 711, 711, 742, 742, 773, 773, 804, 804, 835, 835, 866, 866, 897,
- 897, 928, 928, 928, 30, 30, 30, 61, 61, 92, 92, 123, 123,
- 154, 154, 185, 185, 216, 216, 247, 247, 278, 278, 309, 309, 340,
- 340, 371, 371, 402, 402, 433, 433, 464, 464, 495, 495, 526, 526,
- 557, 557, 588, 588, 619, 619, 650, 650, 681, 681, 712, 712, 743,
- 743, 774, 774, 805, 805, 836, 836, 867, 867, 898, 898, 929, 929,
- 960, 960, 960, 31, 62, 62, 93, 93, 124, 124, 155, 155, 186,
- 186, 217, 217, 248, 248, 279, 279, 310, 310, 341, 341, 372, 372,
- 403, 403, 434, 434, 465, 465, 496, 496, 527, 527, 558, 558, 589,
- 589, 620, 620, 651, 651, 682, 682, 713, 713, 744, 744, 775, 775,
- 806, 806, 837, 837, 868, 868, 899, 899, 930, 930, 961, 961, 992,
- 992, 992, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218, 218,
- 249, 249, 280, 280, 311, 311, 342, 342, 373, 373, 404, 404, 435,
- 435, 466, 466, 497, 497, 528, 528, 559, 559, 590, 590, 621, 621,
- 652, 652, 683, 683, 714, 714, 745, 745, 776, 776, 807, 807, 838,
- 838, 869, 869, 900, 900, 931, 931, 962, 962, 993, 993, 1024, 1024,
- 1024, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 250, 281,
- 281, 312, 312, 343, 343, 374, 374, 405, 405, 436, 436, 467, 467,
- 498, 498, 529, 529, 560, 560, 591, 591, 622, 622, 653, 653, 684,
- 684, 715, 715, 746, 746, 777, 777, 808, 808, 839, 839, 870, 870,
- 901, 901, 932, 932, 963, 963, 994, 994, 1025, 1025, 1056, 1056, 1056,
- 127, 158, 158, 189, 189, 220, 220, 251, 251, 282, 282, 313, 313,
- 344, 344, 375, 375, 406, 406, 437, 437, 468, 468, 499, 499, 530,
- 530, 561, 561, 592, 592, 623, 623, 654, 654, 685, 685, 716, 716,
- 747, 747, 778, 778, 809, 809, 840, 840, 871, 871, 902, 902, 933,
- 933, 964, 964, 995, 995, 1026, 1026, 1057, 1057, 1088, 1088, 1088, 159,
- 190, 190, 221, 221, 252, 252, 283, 283, 314, 314, 345, 345, 376,
- 376, 407, 407, 438, 438, 469, 469, 500, 500, 531, 531, 562, 562,
- 593, 593, 624, 624, 655, 655, 686, 686, 717, 717, 748, 748, 779,
- 779, 810, 810, 841, 841, 872, 872, 903, 903, 934, 934, 965, 965,
- 996, 996, 1027, 1027, 1058, 1058, 1089, 1089, 1120, 1120, 1120, 191, 222,
- 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, 377, 408, 408,
- 439, 439, 470, 470, 501, 501, 532, 532, 563, 563, 594, 594, 625,
- 625, 656, 656, 687, 687, 718, 718, 749, 749, 780, 780, 811, 811,
- 842, 842, 873, 873, 904, 904, 935, 935, 966, 966, 997, 997, 1028,
- 1028, 1059, 1059, 1090, 1090, 1121, 1121, 1152, 1152, 1152, 223, 254, 254,
- 285, 285, 316, 316, 347, 347, 378, 378, 409, 409, 440, 440, 471,
- 471, 502, 502, 533, 533, 564, 564, 595, 595, 626, 626, 657, 657,
- 688, 688, 719, 719, 750, 750, 781, 781, 812, 812, 843, 843, 874,
- 874, 905, 905, 936, 936, 967, 967, 998, 998, 1029, 1029, 1060, 1060,
- 1091, 1091, 1122, 1122, 1153, 1153, 1184, 1184, 1184, 255, 286, 286, 317,
- 317, 348, 348, 379, 379, 410, 410, 441, 441, 472, 472, 503, 503,
- 534, 534, 565, 565, 596, 596, 627, 627, 658, 658, 689, 689, 720,
- 720, 751, 751, 782, 782, 813, 813, 844, 844, 875, 875, 906, 906,
- 937, 937, 968, 968, 999, 999, 1030, 1030, 1061, 1061, 1092, 1092, 1123,
- 1123, 1154, 1154, 1185, 1185, 1216, 1216, 1216, 287, 318, 318, 349, 349,
- 380, 380, 411, 411, 442, 442, 473, 473, 504, 504, 535, 535, 566,
- 566, 597, 597, 628, 628, 659, 659, 690, 690, 721, 721, 752, 752,
- 783, 783, 814, 814, 845, 845, 876, 876, 907, 907, 938, 938, 969,
- 969, 1000, 1000, 1031, 1031, 1062, 1062, 1093, 1093, 1124, 1124, 1155, 1155,
- 1186, 1186, 1217, 1217, 1248, 1248, 1248, 319, 350, 350, 381, 381, 412,
- 412, 443, 443, 474, 474, 505, 505, 536, 536, 567, 567, 598, 598,
- 629, 629, 660, 660, 691, 691, 722, 722, 753, 753, 784, 784, 815,
- 815, 846, 846, 877, 877, 908, 908, 939, 939, 970, 970, 1001, 1001,
- 1032, 1032, 1063, 1063, 1094, 1094, 1125, 1125, 1156, 1156, 1187, 1187, 1218,
- 1218, 1249, 1249, 1280, 1280, 1280, 351, 382, 382, 413, 413, 444, 444,
- 475, 475, 506, 506, 537, 537, 568, 568, 599, 599, 630, 630, 661,
- 661, 692, 692, 723, 723, 754, 754, 785, 785, 816, 816, 847, 847,
- 878, 878, 909, 909, 940, 940, 971, 971, 1002, 1002, 1033, 1033, 1064,
- 1064, 1095, 1095, 1126, 1126, 1157, 1157, 1188, 1188, 1219, 1219, 1250, 1250,
- 1281, 1281, 1312, 1312, 1312, 383, 414, 414, 445, 445, 476, 476, 507,
- 507, 538, 538, 569, 569, 600, 600, 631, 631, 662, 662, 693, 693,
- 724, 724, 755, 755, 786, 786, 817, 817, 848, 848, 879, 879, 910,
- 910, 941, 941, 972, 972, 1003, 1003, 1034, 1034, 1065, 1065, 1096, 1096,
- 1127, 1127, 1158, 1158, 1189, 1189, 1220, 1220, 1251, 1251, 1282, 1282, 1313,
- 1313, 1344, 1344, 1344, 415, 446, 446, 477, 477, 508, 508, 539, 539,
- 570, 570, 601, 601, 632, 632, 663, 663, 694, 694, 725, 725, 756,
- 756, 787, 787, 818, 818, 849, 849, 880, 880, 911, 911, 942, 942,
- 973, 973, 1004, 1004, 1035, 1035, 1066, 1066, 1097, 1097, 1128, 1128, 1159,
- 1159, 1190, 1190, 1221, 1221, 1252, 1252, 1283, 1283, 1314, 1314, 1345, 1345,
- 1376, 1376, 1376, 447, 478, 478, 509, 509, 540, 540, 571, 571, 602,
- 602, 633, 633, 664, 664, 695, 695, 726, 726, 757, 757, 788, 788,
- 819, 819, 850, 850, 881, 881, 912, 912, 943, 943, 974, 974, 1005,
- 1005, 1036, 1036, 1067, 1067, 1098, 1098, 1129, 1129, 1160, 1160, 1191, 1191,
- 1222, 1222, 1253, 1253, 1284, 1284, 1315, 1315, 1346, 1346, 1377, 1377, 1408,
- 1408, 1408, 479, 510, 510, 541, 541, 572, 572, 603, 603, 634, 634,
- 665, 665, 696, 696, 727, 727, 758, 758, 789, 789, 820, 820, 851,
- 851, 882, 882, 913, 913, 944, 944, 975, 975, 1006, 1006, 1037, 1037,
- 1068, 1068, 1099, 1099, 1130, 1130, 1161, 1161, 1192, 1192, 1223, 1223, 1254,
- 1254, 1285, 1285, 1316, 1316, 1347, 1347, 1378, 1378, 1409, 1409, 1440, 1440,
- 1440, 511, 542, 542, 573, 573, 604, 604, 635, 635, 666, 666, 697,
- 697, 728, 728, 759, 759, 790, 790, 821, 821, 852, 852, 883, 883,
- 914, 914, 945, 945, 976, 976, 1007, 1007, 1038, 1038, 1069, 1069, 1100,
- 1100, 1131, 1131, 1162, 1162, 1193, 1193, 1224, 1224, 1255, 1255, 1286, 1286,
- 1317, 1317, 1348, 1348, 1379, 1379, 1410, 1410, 1441, 1441, 1472, 1472, 1472,
- 543, 574, 574, 605, 605, 636, 636, 667, 667, 698, 698, 729, 729,
- 760, 760, 791, 791, 822, 822, 853, 853, 884, 884, 915, 915, 946,
- 946, 977, 977, 1008, 1008, 1039, 1039, 1070, 1070, 1101, 1101, 1132, 1132,
- 1163, 1163, 1194, 1194, 1225, 1225, 1256, 1256, 1287, 1287, 1318, 1318, 1349,
- 1349, 1380, 1380, 1411, 1411, 1442, 1442, 1473, 1473, 1504, 1504, 1504, 575,
- 606, 606, 637, 637, 668, 668, 699, 699, 730, 730, 761, 761, 792,
- 792, 823, 823, 854, 854, 885, 885, 916, 916, 947, 947, 978, 978,
- 1009, 1009, 1040, 1040, 1071, 1071, 1102, 1102, 1133, 1133, 1164, 1164, 1195,
- 1195, 1226, 1226, 1257, 1257, 1288, 1288, 1319, 1319, 1350, 1350, 1381, 1381,
- 1412, 1412, 1443, 1443, 1474, 1474, 1505, 1505, 1536, 1536, 1536, 607, 638,
- 638, 669, 669, 700, 700, 731, 731, 762, 762, 793, 793, 824, 824,
- 855, 855, 886, 886, 917, 917, 948, 948, 979, 979, 1010, 1010, 1041,
- 1041, 1072, 1072, 1103, 1103, 1134, 1134, 1165, 1165, 1196, 1196, 1227, 1227,
- 1258, 1258, 1289, 1289, 1320, 1320, 1351, 1351, 1382, 1382, 1413, 1413, 1444,
- 1444, 1475, 1475, 1506, 1506, 1537, 1537, 1568, 1568, 1568, 639, 670, 670,
- 701, 701, 732, 732, 763, 763, 794, 794, 825, 825, 856, 856, 887,
- 887, 918, 918, 949, 949, 980, 980, 1011, 1011, 1042, 1042, 1073, 1073,
- 1104, 1104, 1135, 1135, 1166, 1166, 1197, 1197, 1228, 1228, 1259, 1259, 1290,
- 1290, 1321, 1321, 1352, 1352, 1383, 1383, 1414, 1414, 1445, 1445, 1476, 1476,
- 1507, 1507, 1538, 1538, 1569, 1569, 1600, 1600, 1600, 671, 702, 702, 733,
- 733, 764, 764, 795, 795, 826, 826, 857, 857, 888, 888, 919, 919,
- 950, 950, 981, 981, 1012, 1012, 1043, 1043, 1074, 1074, 1105, 1105, 1136,
- 1136, 1167, 1167, 1198, 1198, 1229, 1229, 1260, 1260, 1291, 1291, 1322, 1322,
- 1353, 1353, 1384, 1384, 1415, 1415, 1446, 1446, 1477, 1477, 1508, 1508, 1539,
- 1539, 1570, 1570, 1601, 1601, 1632, 1632, 1632, 703, 734, 734, 765, 765,
- 796, 796, 827, 827, 858, 858, 889, 889, 920, 920, 951, 951, 982,
- 982, 1013, 1013, 1044, 1044, 1075, 1075, 1106, 1106, 1137, 1137, 1168, 1168,
- 1199, 1199, 1230, 1230, 1261, 1261, 1292, 1292, 1323, 1323, 1354, 1354, 1385,
- 1385, 1416, 1416, 1447, 1447, 1478, 1478, 1509, 1509, 1540, 1540, 1571, 1571,
- 1602, 1602, 1633, 1633, 1664, 1664, 1664, 735, 766, 766, 797, 797, 828,
- 828, 859, 859, 890, 890, 921, 921, 952, 952, 983, 983, 1014, 1014,
- 1045, 1045, 1076, 1076, 1107, 1107, 1138, 1138, 1169, 1169, 1200, 1200, 1231,
- 1231, 1262, 1262, 1293, 1293, 1324, 1324, 1355, 1355, 1386, 1386, 1417, 1417,
- 1448, 1448, 1479, 1479, 1510, 1510, 1541, 1541, 1572, 1572, 1603, 1603, 1634,
- 1634, 1665, 1665, 1696, 1696, 1696, 767, 798, 798, 829, 829, 860, 860,
- 891, 891, 922, 922, 953, 953, 984, 984, 1015, 1015, 1046, 1046, 1077,
- 1077, 1108, 1108, 1139, 1139, 1170, 1170, 1201, 1201, 1232, 1232, 1263, 1263,
- 1294, 1294, 1325, 1325, 1356, 1356, 1387, 1387, 1418, 1418, 1449, 1449, 1480,
- 1480, 1511, 1511, 1542, 1542, 1573, 1573, 1604, 1604, 1635, 1635, 1666, 1666,
- 1697, 1697, 1728, 1728, 1728, 799, 830, 830, 861, 861, 892, 892, 923,
- 923, 954, 954, 985, 985, 1016, 1016, 1047, 1047, 1078, 1078, 1109, 1109,
- 1140, 1140, 1171, 1171, 1202, 1202, 1233, 1233, 1264, 1264, 1295, 1295, 1326,
- 1326, 1357, 1357, 1388, 1388, 1419, 1419, 1450, 1450, 1481, 1481, 1512, 1512,
- 1543, 1543, 1574, 1574, 1605, 1605, 1636, 1636, 1667, 1667, 1698, 1698, 1729,
- 1729, 1760, 1760, 1760, 831, 862, 862, 893, 893, 924, 924, 955, 955,
- 986, 986, 1017, 1017, 1048, 1048, 1079, 1079, 1110, 1110, 1141, 1141, 1172,
- 1172, 1203, 1203, 1234, 1234, 1265, 1265, 1296, 1296, 1327, 1327, 1358, 1358,
- 1389, 1389, 1420, 1420, 1451, 1451, 1482, 1482, 1513, 1513, 1544, 1544, 1575,
- 1575, 1606, 1606, 1637, 1637, 1668, 1668, 1699, 1699, 1730, 1730, 1761, 1761,
- 1792, 1792, 1792, 863, 894, 894, 925, 925, 956, 956, 987, 987, 1018,
- 1018, 1049, 1049, 1080, 1080, 1111, 1111, 1142, 1142, 1173, 1173, 1204, 1204,
- 1235, 1235, 1266, 1266, 1297, 1297, 1328, 1328, 1359, 1359, 1390, 1390, 1421,
- 1421, 1452, 1452, 1483, 1483, 1514, 1514, 1545, 1545, 1576, 1576, 1607, 1607,
- 1638, 1638, 1669, 1669, 1700, 1700, 1731, 1731, 1762, 1762, 1793, 1793, 1824,
- 1824, 1824, 895, 926, 926, 957, 957, 988, 988, 1019, 1019, 1050, 1050,
- 1081, 1081, 1112, 1112, 1143, 1143, 1174, 1174, 1205, 1205, 1236, 1236, 1267,
- 1267, 1298, 1298, 1329, 1329, 1360, 1360, 1391, 1391, 1422, 1422, 1453, 1453,
- 1484, 1484, 1515, 1515, 1546, 1546, 1577, 1577, 1608, 1608, 1639, 1639, 1670,
- 1670, 1701, 1701, 1732, 1732, 1763, 1763, 1794, 1794, 1825, 1825, 1856, 1856,
- 1856, 927, 958, 958, 989, 989, 1020, 1020, 1051, 1051, 1082, 1082, 1113,
- 1113, 1144, 1144, 1175, 1175, 1206, 1206, 1237, 1237, 1268, 1268, 1299, 1299,
- 1330, 1330, 1361, 1361, 1392, 1392, 1423, 1423, 1454, 1454, 1485, 1485, 1516,
- 1516, 1547, 1547, 1578, 1578, 1609, 1609, 1640, 1640, 1671, 1671, 1702, 1702,
- 1733, 1733, 1764, 1764, 1795, 1795, 1826, 1826, 1857, 1857, 1888, 1888, 1888,
- 959, 990, 990, 1021, 1021, 1052, 1052, 1083, 1083, 1114, 1114, 1145, 1145,
- 1176, 1176, 1207, 1207, 1238, 1238, 1269, 1269, 1300, 1300, 1331, 1331, 1362,
- 1362, 1393, 1393, 1424, 1424, 1455, 1455, 1486, 1486, 1517, 1517, 1548, 1548,
- 1579, 1579, 1610, 1610, 1641, 1641, 1672, 1672, 1703, 1703, 1734, 1734, 1765,
- 1765, 1796, 1796, 1827, 1827, 1858, 1858, 1889, 1889, 1920, 1920, 1920, 991,
- 1022, 1022, 1053, 1053, 1084, 1084, 1115, 1115, 1146, 1146, 1177, 1177, 1208,
- 1208, 1239, 1239, 1270, 1270, 1301, 1301, 1332, 1332, 1363, 1363, 1394, 1394,
- 1425, 1425, 1456, 1456, 1487, 1487, 1518, 1518, 1549, 1549, 1580, 1580, 1611,
- 1611, 1642, 1642, 1673, 1673, 1704, 1704, 1735, 1735, 1766, 1766, 1797, 1797,
- 1828, 1828, 1859, 1859, 1890, 1890, 1921, 1921, 1952, 1952, 1952, 1023, 1054,
- 1054, 1085, 1085, 1116, 1116, 1147, 1147, 1178, 1178, 1209, 1209, 1240, 1240,
- 1271, 1271, 1302, 1302, 1333, 1333, 1364, 1364, 1395, 1395, 1426, 1426, 1457,
- 1457, 1488, 1488, 1519, 1519, 1550, 1550, 1581, 1581, 1612, 1612, 1643, 1643,
- 1674, 1674, 1705, 1705, 1736, 1736, 1767, 1767, 1798, 1798, 1829, 1829, 1860,
- 1860, 1891, 1891, 1922, 1922, 1953, 1953, 1984, 1984, 1984, 1055, 1086, 1086,
- 1117, 1117, 1148, 1148, 1179, 1179, 1210, 1210, 1241, 1241, 1272, 1272, 1303,
- 1303, 1334, 1334, 1365, 1365, 1396, 1396, 1427, 1427, 1458, 1458, 1489, 1489,
- 1520, 1520, 1551, 1551, 1582, 1582, 1613, 1613, 1644, 1644, 1675, 1675, 1706,
- 1706, 1737, 1737, 1768, 1768, 1799, 1799, 1830, 1830, 1861, 1861, 1892, 1892,
- 1923, 1923, 1954, 1954, 1985, 1985, 2016, 1087, 1118, 1118, 1149, 1149, 1180,
- 1180, 1211, 1211, 1242, 1242, 1273, 1273, 1304, 1304, 1335, 1335, 1366, 1366,
- 1397, 1397, 1428, 1428, 1459, 1459, 1490, 1490, 1521, 1521, 1552, 1552, 1583,
- 1583, 1614, 1614, 1645, 1645, 1676, 1676, 1707, 1707, 1738, 1738, 1769, 1769,
- 1800, 1800, 1831, 1831, 1862, 1862, 1893, 1893, 1924, 1924, 1955, 1955, 1986,
- 1986, 2017, 1119, 1150, 1150, 1181, 1181, 1212, 1212, 1243, 1243, 1274, 1274,
- 1305, 1305, 1336, 1336, 1367, 1367, 1398, 1398, 1429, 1429, 1460, 1460, 1491,
- 1491, 1522, 1522, 1553, 1553, 1584, 1584, 1615, 1615, 1646, 1646, 1677, 1677,
- 1708, 1708, 1739, 1739, 1770, 1770, 1801, 1801, 1832, 1832, 1863, 1863, 1894,
- 1894, 1925, 1925, 1956, 1956, 1987, 1987, 2018, 1151, 1182, 1182, 1213, 1213,
- 1244, 1244, 1275, 1275, 1306, 1306, 1337, 1337, 1368, 1368, 1399, 1399, 1430,
- 1430, 1461, 1461, 1492, 1492, 1523, 1523, 1554, 1554, 1585, 1585, 1616, 1616,
- 1647, 1647, 1678, 1678, 1709, 1709, 1740, 1740, 1771, 1771, 1802, 1802, 1833,
- 1833, 1864, 1864, 1895, 1895, 1926, 1926, 1957, 1957, 1988, 1988, 2019, 1183,
- 1214, 1214, 1245, 1245, 1276, 1276, 1307, 1307, 1338, 1338, 1369, 1369, 1400,
- 1400, 1431, 1431, 1462, 1462, 1493, 1493, 1524, 1524, 1555, 1555, 1586, 1586,
- 1617, 1617, 1648, 1648, 1679, 1679, 1710, 1710, 1741, 1741, 1772, 1772, 1803,
- 1803, 1834, 1834, 1865, 1865, 1896, 1896, 1927, 1927, 1958, 1958, 1989, 1989,
- 2020, 1215, 1246, 1246, 1277, 1277, 1308, 1308, 1339, 1339, 1370, 1370, 1401,
- 1401, 1432, 1432, 1463, 1463, 1494, 1494, 1525, 1525, 1556, 1556, 1587, 1587,
- 1618, 1618, 1649, 1649, 1680, 1680, 1711, 1711, 1742, 1742, 1773, 1773, 1804,
- 1804, 1835, 1835, 1866, 1866, 1897, 1897, 1928, 1928, 1959, 1959, 1990, 1990,
- 2021, 1247, 1278, 1278, 1309, 1309, 1340, 1340, 1371, 1371, 1402, 1402, 1433,
- 1433, 1464, 1464, 1495, 1495, 1526, 1526, 1557, 1557, 1588, 1588, 1619, 1619,
- 1650, 1650, 1681, 1681, 1712, 1712, 1743, 1743, 1774, 1774, 1805, 1805, 1836,
- 1836, 1867, 1867, 1898, 1898, 1929, 1929, 1960, 1960, 1991, 1991, 2022, 1279,
- 1310, 1310, 1341, 1341, 1372, 1372, 1403, 1403, 1434, 1434, 1465, 1465, 1496,
- 1496, 1527, 1527, 1558, 1558, 1589, 1589, 1620, 1620, 1651, 1651, 1682, 1682,
- 1713, 1713, 1744, 1744, 1775, 1775, 1806, 1806, 1837, 1837, 1868, 1868, 1899,
- 1899, 1930, 1930, 1961, 1961, 1992, 1992, 2023, 1311, 1342, 1342, 1373, 1373,
- 1404, 1404, 1435, 1435, 1466, 1466, 1497, 1497, 1528, 1528, 1559, 1559, 1590,
- 1590, 1621, 1621, 1652, 1652, 1683, 1683, 1714, 1714, 1745, 1745, 1776, 1776,
- 1807, 1807, 1838, 1838, 1869, 1869, 1900, 1900, 1931, 1931, 1962, 1962, 1993,
- 1993, 2024, 1343, 1374, 1374, 1405, 1405, 1436, 1436, 1467, 1467, 1498, 1498,
- 1529, 1529, 1560, 1560, 1591, 1591, 1622, 1622, 1653, 1653, 1684, 1684, 1715,
- 1715, 1746, 1746, 1777, 1777, 1808, 1808, 1839, 1839, 1870, 1870, 1901, 1901,
- 1932, 1932, 1963, 1963, 1994, 1994, 2025, 1375, 1406, 1406, 1437, 1437, 1468,
- 1468, 1499, 1499, 1530, 1530, 1561, 1561, 1592, 1592, 1623, 1623, 1654, 1654,
- 1685, 1685, 1716, 1716, 1747, 1747, 1778, 1778, 1809, 1809, 1840, 1840, 1871,
- 1871, 1902, 1902, 1933, 1933, 1964, 1964, 1995, 1995, 2026, 1407, 1438, 1438,
- 1469, 1469, 1500, 1500, 1531, 1531, 1562, 1562, 1593, 1593, 1624, 1624, 1655,
- 1655, 1686, 1686, 1717, 1717, 1748, 1748, 1779, 1779, 1810, 1810, 1841, 1841,
- 1872, 1872, 1903, 1903, 1934, 1934, 1965, 1965, 1996, 1996, 2027, 1439, 1470,
- 1470, 1501, 1501, 1532, 1532, 1563, 1563, 1594, 1594, 1625, 1625, 1656, 1656,
- 1687, 1687, 1718, 1718, 1749, 1749, 1780, 1780, 1811, 1811, 1842, 1842, 1873,
- 1873, 1904, 1904, 1935, 1935, 1966, 1966, 1997, 1997, 2028, 1471, 1502, 1502,
- 1533, 1533, 1564, 1564, 1595, 1595, 1626, 1626, 1657, 1657, 1688, 1688, 1719,
- 1719, 1750, 1750, 1781, 1781, 1812, 1812, 1843, 1843, 1874, 1874, 1905, 1905,
- 1936, 1936, 1967, 1967, 1998, 1998, 2029, 1503, 1534, 1534, 1565, 1565, 1596,
- 1596, 1627, 1627, 1658, 1658, 1689, 1689, 1720, 1720, 1751, 1751, 1782, 1782,
- 1813, 1813, 1844, 1844, 1875, 1875, 1906, 1906, 1937, 1937, 1968, 1968, 1999,
- 1999, 2030, 1535, 1566, 1566, 1597, 1597, 1628, 1628, 1659, 1659, 1690, 1690,
- 1721, 1721, 1752, 1752, 1783, 1783, 1814, 1814, 1845, 1845, 1876, 1876, 1907,
- 1907, 1938, 1938, 1969, 1969, 2000, 2000, 2031, 1567, 1598, 1598, 1629, 1629,
- 1660, 1660, 1691, 1691, 1722, 1722, 1753, 1753, 1784, 1784, 1815, 1815, 1846,
- 1846, 1877, 1877, 1908, 1908, 1939, 1939, 1970, 1970, 2001, 2001, 2032, 1599,
- 1630, 1630, 1661, 1661, 1692, 1692, 1723, 1723, 1754, 1754, 1785, 1785, 1816,
- 1816, 1847, 1847, 1878, 1878, 1909, 1909, 1940, 1940, 1971, 1971, 2002, 2002,
- 2033, 1631, 1662, 1662, 1693, 1693, 1724, 1724, 1755, 1755, 1786, 1786, 1817,
- 1817, 1848, 1848, 1879, 1879, 1910, 1910, 1941, 1941, 1972, 1972, 2003, 2003,
- 2034, 1663, 1694, 1694, 1725, 1725, 1756, 1756, 1787, 1787, 1818, 1818, 1849,
- 1849, 1880, 1880, 1911, 1911, 1942, 1942, 1973, 1973, 2004, 2004, 2035, 1695,
- 1726, 1726, 1757, 1757, 1788, 1788, 1819, 1819, 1850, 1850, 1881, 1881, 1912,
- 1912, 1943, 1943, 1974, 1974, 2005, 2005, 2036, 1727, 1758, 1758, 1789, 1789,
- 1820, 1820, 1851, 1851, 1882, 1882, 1913, 1913, 1944, 1944, 1975, 1975, 2006,
- 2006, 2037, 1759, 1790, 1790, 1821, 1821, 1852, 1852, 1883, 1883, 1914, 1914,
- 1945, 1945, 1976, 1976, 2007, 2007, 2038, 1791, 1822, 1822, 1853, 1853, 1884,
- 1884, 1915, 1915, 1946, 1946, 1977, 1977, 2008, 2008, 2039, 1823, 1854, 1854,
- 1885, 1885, 1916, 1916, 1947, 1947, 1978, 1978, 2009, 2009, 2040, 1855, 1886,
- 1886, 1917, 1917, 1948, 1948, 1979, 1979, 2010, 2010, 2041, 1887, 1918, 1918,
- 1949, 1949, 1980, 1980, 2011, 2011, 2042, 1919, 1950, 1950, 1981, 1981, 2012,
- 2012, 2043, 1951, 1982, 1982, 2013, 2013, 2044, 1983, 2014, 2014, 2045, 2015,
- 2046, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_64x32_neighbors[2049 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 64, 64, 64, 2,
- 2, 2, 65, 65, 128, 128, 128, 3, 3, 3, 66, 66, 129,
- 129, 192, 192, 192, 4, 4, 4, 67, 67, 130, 130, 193, 193,
- 256, 256, 256, 5, 5, 5, 68, 68, 131, 131, 194, 194, 257,
- 257, 320, 320, 320, 6, 6, 6, 69, 69, 132, 132, 195, 195,
- 258, 258, 321, 321, 384, 384, 384, 7, 7, 7, 70, 70, 133,
- 133, 196, 196, 259, 259, 322, 322, 385, 385, 448, 448, 448, 8,
- 8, 8, 71, 71, 134, 134, 197, 197, 260, 260, 323, 323, 386,
- 386, 449, 449, 512, 512, 512, 9, 9, 9, 72, 72, 135, 135,
- 198, 198, 261, 261, 324, 324, 387, 387, 450, 450, 513, 513, 576,
- 576, 576, 10, 10, 10, 73, 73, 136, 136, 199, 199, 262, 262,
- 325, 325, 388, 388, 451, 451, 514, 514, 577, 577, 640, 640, 640,
- 11, 11, 11, 74, 74, 137, 137, 200, 200, 263, 263, 326, 326,
- 389, 389, 452, 452, 515, 515, 578, 578, 641, 641, 704, 704, 704,
- 12, 12, 12, 75, 75, 138, 138, 201, 201, 264, 264, 327, 327,
- 390, 390, 453, 453, 516, 516, 579, 579, 642, 642, 705, 705, 768,
- 768, 768, 13, 13, 13, 76, 76, 139, 139, 202, 202, 265, 265,
- 328, 328, 391, 391, 454, 454, 517, 517, 580, 580, 643, 643, 706,
- 706, 769, 769, 832, 832, 832, 14, 14, 14, 77, 77, 140, 140,
- 203, 203, 266, 266, 329, 329, 392, 392, 455, 455, 518, 518, 581,
- 581, 644, 644, 707, 707, 770, 770, 833, 833, 896, 896, 896, 15,
- 15, 15, 78, 78, 141, 141, 204, 204, 267, 267, 330, 330, 393,
- 393, 456, 456, 519, 519, 582, 582, 645, 645, 708, 708, 771, 771,
- 834, 834, 897, 897, 960, 960, 960, 16, 16, 16, 79, 79, 142,
- 142, 205, 205, 268, 268, 331, 331, 394, 394, 457, 457, 520, 520,
- 583, 583, 646, 646, 709, 709, 772, 772, 835, 835, 898, 898, 961,
- 961, 1024, 1024, 1024, 17, 17, 17, 80, 80, 143, 143, 206, 206,
- 269, 269, 332, 332, 395, 395, 458, 458, 521, 521, 584, 584, 647,
- 647, 710, 710, 773, 773, 836, 836, 899, 899, 962, 962, 1025, 1025,
- 1088, 1088, 1088, 18, 18, 18, 81, 81, 144, 144, 207, 207, 270,
- 270, 333, 333, 396, 396, 459, 459, 522, 522, 585, 585, 648, 648,
- 711, 711, 774, 774, 837, 837, 900, 900, 963, 963, 1026, 1026, 1089,
- 1089, 1152, 1152, 1152, 19, 19, 19, 82, 82, 145, 145, 208, 208,
- 271, 271, 334, 334, 397, 397, 460, 460, 523, 523, 586, 586, 649,
- 649, 712, 712, 775, 775, 838, 838, 901, 901, 964, 964, 1027, 1027,
- 1090, 1090, 1153, 1153, 1216, 1216, 1216, 20, 20, 20, 83, 83, 146,
- 146, 209, 209, 272, 272, 335, 335, 398, 398, 461, 461, 524, 524,
- 587, 587, 650, 650, 713, 713, 776, 776, 839, 839, 902, 902, 965,
- 965, 1028, 1028, 1091, 1091, 1154, 1154, 1217, 1217, 1280, 1280, 1280, 21,
- 21, 21, 84, 84, 147, 147, 210, 210, 273, 273, 336, 336, 399,
- 399, 462, 462, 525, 525, 588, 588, 651, 651, 714, 714, 777, 777,
- 840, 840, 903, 903, 966, 966, 1029, 1029, 1092, 1092, 1155, 1155, 1218,
- 1218, 1281, 1281, 1344, 1344, 1344, 22, 22, 22, 85, 85, 148, 148,
- 211, 211, 274, 274, 337, 337, 400, 400, 463, 463, 526, 526, 589,
- 589, 652, 652, 715, 715, 778, 778, 841, 841, 904, 904, 967, 967,
- 1030, 1030, 1093, 1093, 1156, 1156, 1219, 1219, 1282, 1282, 1345, 1345, 1408,
- 1408, 1408, 23, 23, 23, 86, 86, 149, 149, 212, 212, 275, 275,
- 338, 338, 401, 401, 464, 464, 527, 527, 590, 590, 653, 653, 716,
- 716, 779, 779, 842, 842, 905, 905, 968, 968, 1031, 1031, 1094, 1094,
- 1157, 1157, 1220, 1220, 1283, 1283, 1346, 1346, 1409, 1409, 1472, 1472, 1472,
- 24, 24, 24, 87, 87, 150, 150, 213, 213, 276, 276, 339, 339,
- 402, 402, 465, 465, 528, 528, 591, 591, 654, 654, 717, 717, 780,
- 780, 843, 843, 906, 906, 969, 969, 1032, 1032, 1095, 1095, 1158, 1158,
- 1221, 1221, 1284, 1284, 1347, 1347, 1410, 1410, 1473, 1473, 1536, 1536, 1536,
- 25, 25, 25, 88, 88, 151, 151, 214, 214, 277, 277, 340, 340,
- 403, 403, 466, 466, 529, 529, 592, 592, 655, 655, 718, 718, 781,
- 781, 844, 844, 907, 907, 970, 970, 1033, 1033, 1096, 1096, 1159, 1159,
- 1222, 1222, 1285, 1285, 1348, 1348, 1411, 1411, 1474, 1474, 1537, 1537, 1600,
- 1600, 1600, 26, 26, 26, 89, 89, 152, 152, 215, 215, 278, 278,
- 341, 341, 404, 404, 467, 467, 530, 530, 593, 593, 656, 656, 719,
- 719, 782, 782, 845, 845, 908, 908, 971, 971, 1034, 1034, 1097, 1097,
- 1160, 1160, 1223, 1223, 1286, 1286, 1349, 1349, 1412, 1412, 1475, 1475, 1538,
- 1538, 1601, 1601, 1664, 1664, 1664, 27, 27, 27, 90, 90, 153, 153,
- 216, 216, 279, 279, 342, 342, 405, 405, 468, 468, 531, 531, 594,
- 594, 657, 657, 720, 720, 783, 783, 846, 846, 909, 909, 972, 972,
- 1035, 1035, 1098, 1098, 1161, 1161, 1224, 1224, 1287, 1287, 1350, 1350, 1413,
- 1413, 1476, 1476, 1539, 1539, 1602, 1602, 1665, 1665, 1728, 1728, 1728, 28,
- 28, 28, 91, 91, 154, 154, 217, 217, 280, 280, 343, 343, 406,
- 406, 469, 469, 532, 532, 595, 595, 658, 658, 721, 721, 784, 784,
- 847, 847, 910, 910, 973, 973, 1036, 1036, 1099, 1099, 1162, 1162, 1225,
- 1225, 1288, 1288, 1351, 1351, 1414, 1414, 1477, 1477, 1540, 1540, 1603, 1603,
- 1666, 1666, 1729, 1729, 1792, 1792, 1792, 29, 29, 29, 92, 92, 155,
- 155, 218, 218, 281, 281, 344, 344, 407, 407, 470, 470, 533, 533,
- 596, 596, 659, 659, 722, 722, 785, 785, 848, 848, 911, 911, 974,
- 974, 1037, 1037, 1100, 1100, 1163, 1163, 1226, 1226, 1289, 1289, 1352, 1352,
- 1415, 1415, 1478, 1478, 1541, 1541, 1604, 1604, 1667, 1667, 1730, 1730, 1793,
- 1793, 1856, 1856, 1856, 30, 30, 30, 93, 93, 156, 156, 219, 219,
- 282, 282, 345, 345, 408, 408, 471, 471, 534, 534, 597, 597, 660,
- 660, 723, 723, 786, 786, 849, 849, 912, 912, 975, 975, 1038, 1038,
- 1101, 1101, 1164, 1164, 1227, 1227, 1290, 1290, 1353, 1353, 1416, 1416, 1479,
- 1479, 1542, 1542, 1605, 1605, 1668, 1668, 1731, 1731, 1794, 1794, 1857, 1857,
- 1920, 1920, 1920, 31, 31, 31, 94, 94, 157, 157, 220, 220, 283,
- 283, 346, 346, 409, 409, 472, 472, 535, 535, 598, 598, 661, 661,
- 724, 724, 787, 787, 850, 850, 913, 913, 976, 976, 1039, 1039, 1102,
- 1102, 1165, 1165, 1228, 1228, 1291, 1291, 1354, 1354, 1417, 1417, 1480, 1480,
- 1543, 1543, 1606, 1606, 1669, 1669, 1732, 1732, 1795, 1795, 1858, 1858, 1921,
- 1921, 1984, 32, 32, 32, 95, 95, 158, 158, 221, 221, 284, 284,
- 347, 347, 410, 410, 473, 473, 536, 536, 599, 599, 662, 662, 725,
- 725, 788, 788, 851, 851, 914, 914, 977, 977, 1040, 1040, 1103, 1103,
- 1166, 1166, 1229, 1229, 1292, 1292, 1355, 1355, 1418, 1418, 1481, 1481, 1544,
- 1544, 1607, 1607, 1670, 1670, 1733, 1733, 1796, 1796, 1859, 1859, 1922, 1922,
- 1985, 33, 33, 33, 96, 96, 159, 159, 222, 222, 285, 285, 348,
- 348, 411, 411, 474, 474, 537, 537, 600, 600, 663, 663, 726, 726,
- 789, 789, 852, 852, 915, 915, 978, 978, 1041, 1041, 1104, 1104, 1167,
- 1167, 1230, 1230, 1293, 1293, 1356, 1356, 1419, 1419, 1482, 1482, 1545, 1545,
- 1608, 1608, 1671, 1671, 1734, 1734, 1797, 1797, 1860, 1860, 1923, 1923, 1986,
- 34, 34, 34, 97, 97, 160, 160, 223, 223, 286, 286, 349, 349,
- 412, 412, 475, 475, 538, 538, 601, 601, 664, 664, 727, 727, 790,
- 790, 853, 853, 916, 916, 979, 979, 1042, 1042, 1105, 1105, 1168, 1168,
- 1231, 1231, 1294, 1294, 1357, 1357, 1420, 1420, 1483, 1483, 1546, 1546, 1609,
- 1609, 1672, 1672, 1735, 1735, 1798, 1798, 1861, 1861, 1924, 1924, 1987, 35,
- 35, 35, 98, 98, 161, 161, 224, 224, 287, 287, 350, 350, 413,
- 413, 476, 476, 539, 539, 602, 602, 665, 665, 728, 728, 791, 791,
- 854, 854, 917, 917, 980, 980, 1043, 1043, 1106, 1106, 1169, 1169, 1232,
- 1232, 1295, 1295, 1358, 1358, 1421, 1421, 1484, 1484, 1547, 1547, 1610, 1610,
- 1673, 1673, 1736, 1736, 1799, 1799, 1862, 1862, 1925, 1925, 1988, 36, 36,
- 36, 99, 99, 162, 162, 225, 225, 288, 288, 351, 351, 414, 414,
- 477, 477, 540, 540, 603, 603, 666, 666, 729, 729, 792, 792, 855,
- 855, 918, 918, 981, 981, 1044, 1044, 1107, 1107, 1170, 1170, 1233, 1233,
- 1296, 1296, 1359, 1359, 1422, 1422, 1485, 1485, 1548, 1548, 1611, 1611, 1674,
- 1674, 1737, 1737, 1800, 1800, 1863, 1863, 1926, 1926, 1989, 37, 37, 37,
- 100, 100, 163, 163, 226, 226, 289, 289, 352, 352, 415, 415, 478,
- 478, 541, 541, 604, 604, 667, 667, 730, 730, 793, 793, 856, 856,
- 919, 919, 982, 982, 1045, 1045, 1108, 1108, 1171, 1171, 1234, 1234, 1297,
- 1297, 1360, 1360, 1423, 1423, 1486, 1486, 1549, 1549, 1612, 1612, 1675, 1675,
- 1738, 1738, 1801, 1801, 1864, 1864, 1927, 1927, 1990, 38, 38, 38, 101,
- 101, 164, 164, 227, 227, 290, 290, 353, 353, 416, 416, 479, 479,
- 542, 542, 605, 605, 668, 668, 731, 731, 794, 794, 857, 857, 920,
- 920, 983, 983, 1046, 1046, 1109, 1109, 1172, 1172, 1235, 1235, 1298, 1298,
- 1361, 1361, 1424, 1424, 1487, 1487, 1550, 1550, 1613, 1613, 1676, 1676, 1739,
- 1739, 1802, 1802, 1865, 1865, 1928, 1928, 1991, 39, 39, 39, 102, 102,
- 165, 165, 228, 228, 291, 291, 354, 354, 417, 417, 480, 480, 543,
- 543, 606, 606, 669, 669, 732, 732, 795, 795, 858, 858, 921, 921,
- 984, 984, 1047, 1047, 1110, 1110, 1173, 1173, 1236, 1236, 1299, 1299, 1362,
- 1362, 1425, 1425, 1488, 1488, 1551, 1551, 1614, 1614, 1677, 1677, 1740, 1740,
- 1803, 1803, 1866, 1866, 1929, 1929, 1992, 40, 40, 40, 103, 103, 166,
- 166, 229, 229, 292, 292, 355, 355, 418, 418, 481, 481, 544, 544,
- 607, 607, 670, 670, 733, 733, 796, 796, 859, 859, 922, 922, 985,
- 985, 1048, 1048, 1111, 1111, 1174, 1174, 1237, 1237, 1300, 1300, 1363, 1363,
- 1426, 1426, 1489, 1489, 1552, 1552, 1615, 1615, 1678, 1678, 1741, 1741, 1804,
- 1804, 1867, 1867, 1930, 1930, 1993, 41, 41, 41, 104, 104, 167, 167,
- 230, 230, 293, 293, 356, 356, 419, 419, 482, 482, 545, 545, 608,
- 608, 671, 671, 734, 734, 797, 797, 860, 860, 923, 923, 986, 986,
- 1049, 1049, 1112, 1112, 1175, 1175, 1238, 1238, 1301, 1301, 1364, 1364, 1427,
- 1427, 1490, 1490, 1553, 1553, 1616, 1616, 1679, 1679, 1742, 1742, 1805, 1805,
- 1868, 1868, 1931, 1931, 1994, 42, 42, 42, 105, 105, 168, 168, 231,
- 231, 294, 294, 357, 357, 420, 420, 483, 483, 546, 546, 609, 609,
- 672, 672, 735, 735, 798, 798, 861, 861, 924, 924, 987, 987, 1050,
- 1050, 1113, 1113, 1176, 1176, 1239, 1239, 1302, 1302, 1365, 1365, 1428, 1428,
- 1491, 1491, 1554, 1554, 1617, 1617, 1680, 1680, 1743, 1743, 1806, 1806, 1869,
- 1869, 1932, 1932, 1995, 43, 43, 43, 106, 106, 169, 169, 232, 232,
- 295, 295, 358, 358, 421, 421, 484, 484, 547, 547, 610, 610, 673,
- 673, 736, 736, 799, 799, 862, 862, 925, 925, 988, 988, 1051, 1051,
- 1114, 1114, 1177, 1177, 1240, 1240, 1303, 1303, 1366, 1366, 1429, 1429, 1492,
- 1492, 1555, 1555, 1618, 1618, 1681, 1681, 1744, 1744, 1807, 1807, 1870, 1870,
- 1933, 1933, 1996, 44, 44, 44, 107, 107, 170, 170, 233, 233, 296,
- 296, 359, 359, 422, 422, 485, 485, 548, 548, 611, 611, 674, 674,
- 737, 737, 800, 800, 863, 863, 926, 926, 989, 989, 1052, 1052, 1115,
- 1115, 1178, 1178, 1241, 1241, 1304, 1304, 1367, 1367, 1430, 1430, 1493, 1493,
- 1556, 1556, 1619, 1619, 1682, 1682, 1745, 1745, 1808, 1808, 1871, 1871, 1934,
- 1934, 1997, 45, 45, 45, 108, 108, 171, 171, 234, 234, 297, 297,
- 360, 360, 423, 423, 486, 486, 549, 549, 612, 612, 675, 675, 738,
- 738, 801, 801, 864, 864, 927, 927, 990, 990, 1053, 1053, 1116, 1116,
- 1179, 1179, 1242, 1242, 1305, 1305, 1368, 1368, 1431, 1431, 1494, 1494, 1557,
- 1557, 1620, 1620, 1683, 1683, 1746, 1746, 1809, 1809, 1872, 1872, 1935, 1935,
- 1998, 46, 46, 46, 109, 109, 172, 172, 235, 235, 298, 298, 361,
- 361, 424, 424, 487, 487, 550, 550, 613, 613, 676, 676, 739, 739,
- 802, 802, 865, 865, 928, 928, 991, 991, 1054, 1054, 1117, 1117, 1180,
- 1180, 1243, 1243, 1306, 1306, 1369, 1369, 1432, 1432, 1495, 1495, 1558, 1558,
- 1621, 1621, 1684, 1684, 1747, 1747, 1810, 1810, 1873, 1873, 1936, 1936, 1999,
- 47, 47, 47, 110, 110, 173, 173, 236, 236, 299, 299, 362, 362,
- 425, 425, 488, 488, 551, 551, 614, 614, 677, 677, 740, 740, 803,
- 803, 866, 866, 929, 929, 992, 992, 1055, 1055, 1118, 1118, 1181, 1181,
- 1244, 1244, 1307, 1307, 1370, 1370, 1433, 1433, 1496, 1496, 1559, 1559, 1622,
- 1622, 1685, 1685, 1748, 1748, 1811, 1811, 1874, 1874, 1937, 1937, 2000, 48,
- 48, 48, 111, 111, 174, 174, 237, 237, 300, 300, 363, 363, 426,
- 426, 489, 489, 552, 552, 615, 615, 678, 678, 741, 741, 804, 804,
- 867, 867, 930, 930, 993, 993, 1056, 1056, 1119, 1119, 1182, 1182, 1245,
- 1245, 1308, 1308, 1371, 1371, 1434, 1434, 1497, 1497, 1560, 1560, 1623, 1623,
- 1686, 1686, 1749, 1749, 1812, 1812, 1875, 1875, 1938, 1938, 2001, 49, 49,
- 49, 112, 112, 175, 175, 238, 238, 301, 301, 364, 364, 427, 427,
- 490, 490, 553, 553, 616, 616, 679, 679, 742, 742, 805, 805, 868,
- 868, 931, 931, 994, 994, 1057, 1057, 1120, 1120, 1183, 1183, 1246, 1246,
- 1309, 1309, 1372, 1372, 1435, 1435, 1498, 1498, 1561, 1561, 1624, 1624, 1687,
- 1687, 1750, 1750, 1813, 1813, 1876, 1876, 1939, 1939, 2002, 50, 50, 50,
- 113, 113, 176, 176, 239, 239, 302, 302, 365, 365, 428, 428, 491,
- 491, 554, 554, 617, 617, 680, 680, 743, 743, 806, 806, 869, 869,
- 932, 932, 995, 995, 1058, 1058, 1121, 1121, 1184, 1184, 1247, 1247, 1310,
- 1310, 1373, 1373, 1436, 1436, 1499, 1499, 1562, 1562, 1625, 1625, 1688, 1688,
- 1751, 1751, 1814, 1814, 1877, 1877, 1940, 1940, 2003, 51, 51, 51, 114,
- 114, 177, 177, 240, 240, 303, 303, 366, 366, 429, 429, 492, 492,
- 555, 555, 618, 618, 681, 681, 744, 744, 807, 807, 870, 870, 933,
- 933, 996, 996, 1059, 1059, 1122, 1122, 1185, 1185, 1248, 1248, 1311, 1311,
- 1374, 1374, 1437, 1437, 1500, 1500, 1563, 1563, 1626, 1626, 1689, 1689, 1752,
- 1752, 1815, 1815, 1878, 1878, 1941, 1941, 2004, 52, 52, 52, 115, 115,
- 178, 178, 241, 241, 304, 304, 367, 367, 430, 430, 493, 493, 556,
- 556, 619, 619, 682, 682, 745, 745, 808, 808, 871, 871, 934, 934,
- 997, 997, 1060, 1060, 1123, 1123, 1186, 1186, 1249, 1249, 1312, 1312, 1375,
- 1375, 1438, 1438, 1501, 1501, 1564, 1564, 1627, 1627, 1690, 1690, 1753, 1753,
- 1816, 1816, 1879, 1879, 1942, 1942, 2005, 53, 53, 53, 116, 116, 179,
- 179, 242, 242, 305, 305, 368, 368, 431, 431, 494, 494, 557, 557,
- 620, 620, 683, 683, 746, 746, 809, 809, 872, 872, 935, 935, 998,
- 998, 1061, 1061, 1124, 1124, 1187, 1187, 1250, 1250, 1313, 1313, 1376, 1376,
- 1439, 1439, 1502, 1502, 1565, 1565, 1628, 1628, 1691, 1691, 1754, 1754, 1817,
- 1817, 1880, 1880, 1943, 1943, 2006, 54, 54, 54, 117, 117, 180, 180,
- 243, 243, 306, 306, 369, 369, 432, 432, 495, 495, 558, 558, 621,
- 621, 684, 684, 747, 747, 810, 810, 873, 873, 936, 936, 999, 999,
- 1062, 1062, 1125, 1125, 1188, 1188, 1251, 1251, 1314, 1314, 1377, 1377, 1440,
- 1440, 1503, 1503, 1566, 1566, 1629, 1629, 1692, 1692, 1755, 1755, 1818, 1818,
- 1881, 1881, 1944, 1944, 2007, 55, 55, 55, 118, 118, 181, 181, 244,
- 244, 307, 307, 370, 370, 433, 433, 496, 496, 559, 559, 622, 622,
- 685, 685, 748, 748, 811, 811, 874, 874, 937, 937, 1000, 1000, 1063,
- 1063, 1126, 1126, 1189, 1189, 1252, 1252, 1315, 1315, 1378, 1378, 1441, 1441,
- 1504, 1504, 1567, 1567, 1630, 1630, 1693, 1693, 1756, 1756, 1819, 1819, 1882,
- 1882, 1945, 1945, 2008, 56, 56, 56, 119, 119, 182, 182, 245, 245,
- 308, 308, 371, 371, 434, 434, 497, 497, 560, 560, 623, 623, 686,
- 686, 749, 749, 812, 812, 875, 875, 938, 938, 1001, 1001, 1064, 1064,
- 1127, 1127, 1190, 1190, 1253, 1253, 1316, 1316, 1379, 1379, 1442, 1442, 1505,
- 1505, 1568, 1568, 1631, 1631, 1694, 1694, 1757, 1757, 1820, 1820, 1883, 1883,
- 1946, 1946, 2009, 57, 57, 57, 120, 120, 183, 183, 246, 246, 309,
- 309, 372, 372, 435, 435, 498, 498, 561, 561, 624, 624, 687, 687,
- 750, 750, 813, 813, 876, 876, 939, 939, 1002, 1002, 1065, 1065, 1128,
- 1128, 1191, 1191, 1254, 1254, 1317, 1317, 1380, 1380, 1443, 1443, 1506, 1506,
- 1569, 1569, 1632, 1632, 1695, 1695, 1758, 1758, 1821, 1821, 1884, 1884, 1947,
- 1947, 2010, 58, 58, 58, 121, 121, 184, 184, 247, 247, 310, 310,
- 373, 373, 436, 436, 499, 499, 562, 562, 625, 625, 688, 688, 751,
- 751, 814, 814, 877, 877, 940, 940, 1003, 1003, 1066, 1066, 1129, 1129,
- 1192, 1192, 1255, 1255, 1318, 1318, 1381, 1381, 1444, 1444, 1507, 1507, 1570,
- 1570, 1633, 1633, 1696, 1696, 1759, 1759, 1822, 1822, 1885, 1885, 1948, 1948,
- 2011, 59, 59, 59, 122, 122, 185, 185, 248, 248, 311, 311, 374,
- 374, 437, 437, 500, 500, 563, 563, 626, 626, 689, 689, 752, 752,
- 815, 815, 878, 878, 941, 941, 1004, 1004, 1067, 1067, 1130, 1130, 1193,
- 1193, 1256, 1256, 1319, 1319, 1382, 1382, 1445, 1445, 1508, 1508, 1571, 1571,
- 1634, 1634, 1697, 1697, 1760, 1760, 1823, 1823, 1886, 1886, 1949, 1949, 2012,
- 60, 60, 60, 123, 123, 186, 186, 249, 249, 312, 312, 375, 375,
- 438, 438, 501, 501, 564, 564, 627, 627, 690, 690, 753, 753, 816,
- 816, 879, 879, 942, 942, 1005, 1005, 1068, 1068, 1131, 1131, 1194, 1194,
- 1257, 1257, 1320, 1320, 1383, 1383, 1446, 1446, 1509, 1509, 1572, 1572, 1635,
- 1635, 1698, 1698, 1761, 1761, 1824, 1824, 1887, 1887, 1950, 1950, 2013, 61,
- 61, 61, 124, 124, 187, 187, 250, 250, 313, 313, 376, 376, 439,
- 439, 502, 502, 565, 565, 628, 628, 691, 691, 754, 754, 817, 817,
- 880, 880, 943, 943, 1006, 1006, 1069, 1069, 1132, 1132, 1195, 1195, 1258,
- 1258, 1321, 1321, 1384, 1384, 1447, 1447, 1510, 1510, 1573, 1573, 1636, 1636,
- 1699, 1699, 1762, 1762, 1825, 1825, 1888, 1888, 1951, 1951, 2014, 62, 62,
- 62, 125, 125, 188, 188, 251, 251, 314, 314, 377, 377, 440, 440,
- 503, 503, 566, 566, 629, 629, 692, 692, 755, 755, 818, 818, 881,
- 881, 944, 944, 1007, 1007, 1070, 1070, 1133, 1133, 1196, 1196, 1259, 1259,
- 1322, 1322, 1385, 1385, 1448, 1448, 1511, 1511, 1574, 1574, 1637, 1637, 1700,
- 1700, 1763, 1763, 1826, 1826, 1889, 1889, 1952, 1952, 2015, 63, 126, 126,
- 189, 189, 252, 252, 315, 315, 378, 378, 441, 441, 504, 504, 567,
- 567, 630, 630, 693, 693, 756, 756, 819, 819, 882, 882, 945, 945,
- 1008, 1008, 1071, 1071, 1134, 1134, 1197, 1197, 1260, 1260, 1323, 1323, 1386,
- 1386, 1449, 1449, 1512, 1512, 1575, 1575, 1638, 1638, 1701, 1701, 1764, 1764,
- 1827, 1827, 1890, 1890, 1953, 1953, 2016, 127, 190, 190, 253, 253, 316,
- 316, 379, 379, 442, 442, 505, 505, 568, 568, 631, 631, 694, 694,
- 757, 757, 820, 820, 883, 883, 946, 946, 1009, 1009, 1072, 1072, 1135,
- 1135, 1198, 1198, 1261, 1261, 1324, 1324, 1387, 1387, 1450, 1450, 1513, 1513,
- 1576, 1576, 1639, 1639, 1702, 1702, 1765, 1765, 1828, 1828, 1891, 1891, 1954,
- 1954, 2017, 191, 254, 254, 317, 317, 380, 380, 443, 443, 506, 506,
- 569, 569, 632, 632, 695, 695, 758, 758, 821, 821, 884, 884, 947,
- 947, 1010, 1010, 1073, 1073, 1136, 1136, 1199, 1199, 1262, 1262, 1325, 1325,
- 1388, 1388, 1451, 1451, 1514, 1514, 1577, 1577, 1640, 1640, 1703, 1703, 1766,
- 1766, 1829, 1829, 1892, 1892, 1955, 1955, 2018, 255, 318, 318, 381, 381,
- 444, 444, 507, 507, 570, 570, 633, 633, 696, 696, 759, 759, 822,
- 822, 885, 885, 948, 948, 1011, 1011, 1074, 1074, 1137, 1137, 1200, 1200,
- 1263, 1263, 1326, 1326, 1389, 1389, 1452, 1452, 1515, 1515, 1578, 1578, 1641,
- 1641, 1704, 1704, 1767, 1767, 1830, 1830, 1893, 1893, 1956, 1956, 2019, 319,
- 382, 382, 445, 445, 508, 508, 571, 571, 634, 634, 697, 697, 760,
- 760, 823, 823, 886, 886, 949, 949, 1012, 1012, 1075, 1075, 1138, 1138,
- 1201, 1201, 1264, 1264, 1327, 1327, 1390, 1390, 1453, 1453, 1516, 1516, 1579,
- 1579, 1642, 1642, 1705, 1705, 1768, 1768, 1831, 1831, 1894, 1894, 1957, 1957,
- 2020, 383, 446, 446, 509, 509, 572, 572, 635, 635, 698, 698, 761,
- 761, 824, 824, 887, 887, 950, 950, 1013, 1013, 1076, 1076, 1139, 1139,
- 1202, 1202, 1265, 1265, 1328, 1328, 1391, 1391, 1454, 1454, 1517, 1517, 1580,
- 1580, 1643, 1643, 1706, 1706, 1769, 1769, 1832, 1832, 1895, 1895, 1958, 1958,
- 2021, 447, 510, 510, 573, 573, 636, 636, 699, 699, 762, 762, 825,
- 825, 888, 888, 951, 951, 1014, 1014, 1077, 1077, 1140, 1140, 1203, 1203,
- 1266, 1266, 1329, 1329, 1392, 1392, 1455, 1455, 1518, 1518, 1581, 1581, 1644,
- 1644, 1707, 1707, 1770, 1770, 1833, 1833, 1896, 1896, 1959, 1959, 2022, 511,
- 574, 574, 637, 637, 700, 700, 763, 763, 826, 826, 889, 889, 952,
- 952, 1015, 1015, 1078, 1078, 1141, 1141, 1204, 1204, 1267, 1267, 1330, 1330,
- 1393, 1393, 1456, 1456, 1519, 1519, 1582, 1582, 1645, 1645, 1708, 1708, 1771,
- 1771, 1834, 1834, 1897, 1897, 1960, 1960, 2023, 575, 638, 638, 701, 701,
- 764, 764, 827, 827, 890, 890, 953, 953, 1016, 1016, 1079, 1079, 1142,
- 1142, 1205, 1205, 1268, 1268, 1331, 1331, 1394, 1394, 1457, 1457, 1520, 1520,
- 1583, 1583, 1646, 1646, 1709, 1709, 1772, 1772, 1835, 1835, 1898, 1898, 1961,
- 1961, 2024, 639, 702, 702, 765, 765, 828, 828, 891, 891, 954, 954,
- 1017, 1017, 1080, 1080, 1143, 1143, 1206, 1206, 1269, 1269, 1332, 1332, 1395,
- 1395, 1458, 1458, 1521, 1521, 1584, 1584, 1647, 1647, 1710, 1710, 1773, 1773,
- 1836, 1836, 1899, 1899, 1962, 1962, 2025, 703, 766, 766, 829, 829, 892,
- 892, 955, 955, 1018, 1018, 1081, 1081, 1144, 1144, 1207, 1207, 1270, 1270,
- 1333, 1333, 1396, 1396, 1459, 1459, 1522, 1522, 1585, 1585, 1648, 1648, 1711,
- 1711, 1774, 1774, 1837, 1837, 1900, 1900, 1963, 1963, 2026, 767, 830, 830,
- 893, 893, 956, 956, 1019, 1019, 1082, 1082, 1145, 1145, 1208, 1208, 1271,
- 1271, 1334, 1334, 1397, 1397, 1460, 1460, 1523, 1523, 1586, 1586, 1649, 1649,
- 1712, 1712, 1775, 1775, 1838, 1838, 1901, 1901, 1964, 1964, 2027, 831, 894,
- 894, 957, 957, 1020, 1020, 1083, 1083, 1146, 1146, 1209, 1209, 1272, 1272,
- 1335, 1335, 1398, 1398, 1461, 1461, 1524, 1524, 1587, 1587, 1650, 1650, 1713,
- 1713, 1776, 1776, 1839, 1839, 1902, 1902, 1965, 1965, 2028, 895, 958, 958,
- 1021, 1021, 1084, 1084, 1147, 1147, 1210, 1210, 1273, 1273, 1336, 1336, 1399,
- 1399, 1462, 1462, 1525, 1525, 1588, 1588, 1651, 1651, 1714, 1714, 1777, 1777,
- 1840, 1840, 1903, 1903, 1966, 1966, 2029, 959, 1022, 1022, 1085, 1085, 1148,
- 1148, 1211, 1211, 1274, 1274, 1337, 1337, 1400, 1400, 1463, 1463, 1526, 1526,
- 1589, 1589, 1652, 1652, 1715, 1715, 1778, 1778, 1841, 1841, 1904, 1904, 1967,
- 1967, 2030, 1023, 1086, 1086, 1149, 1149, 1212, 1212, 1275, 1275, 1338, 1338,
- 1401, 1401, 1464, 1464, 1527, 1527, 1590, 1590, 1653, 1653, 1716, 1716, 1779,
- 1779, 1842, 1842, 1905, 1905, 1968, 1968, 2031, 1087, 1150, 1150, 1213, 1213,
- 1276, 1276, 1339, 1339, 1402, 1402, 1465, 1465, 1528, 1528, 1591, 1591, 1654,
- 1654, 1717, 1717, 1780, 1780, 1843, 1843, 1906, 1906, 1969, 1969, 2032, 1151,
- 1214, 1214, 1277, 1277, 1340, 1340, 1403, 1403, 1466, 1466, 1529, 1529, 1592,
- 1592, 1655, 1655, 1718, 1718, 1781, 1781, 1844, 1844, 1907, 1907, 1970, 1970,
- 2033, 1215, 1278, 1278, 1341, 1341, 1404, 1404, 1467, 1467, 1530, 1530, 1593,
- 1593, 1656, 1656, 1719, 1719, 1782, 1782, 1845, 1845, 1908, 1908, 1971, 1971,
- 2034, 1279, 1342, 1342, 1405, 1405, 1468, 1468, 1531, 1531, 1594, 1594, 1657,
- 1657, 1720, 1720, 1783, 1783, 1846, 1846, 1909, 1909, 1972, 1972, 2035, 1343,
- 1406, 1406, 1469, 1469, 1532, 1532, 1595, 1595, 1658, 1658, 1721, 1721, 1784,
- 1784, 1847, 1847, 1910, 1910, 1973, 1973, 2036, 1407, 1470, 1470, 1533, 1533,
- 1596, 1596, 1659, 1659, 1722, 1722, 1785, 1785, 1848, 1848, 1911, 1911, 1974,
- 1974, 2037, 1471, 1534, 1534, 1597, 1597, 1660, 1660, 1723, 1723, 1786, 1786,
- 1849, 1849, 1912, 1912, 1975, 1975, 2038, 1535, 1598, 1598, 1661, 1661, 1724,
- 1724, 1787, 1787, 1850, 1850, 1913, 1913, 1976, 1976, 2039, 1599, 1662, 1662,
- 1725, 1725, 1788, 1788, 1851, 1851, 1914, 1914, 1977, 1977, 2040, 1663, 1726,
- 1726, 1789, 1789, 1852, 1852, 1915, 1915, 1978, 1978, 2041, 1727, 1790, 1790,
- 1853, 1853, 1916, 1916, 1979, 1979, 2042, 1791, 1854, 1854, 1917, 1917, 1980,
- 1980, 2043, 1855, 1918, 1918, 1981, 1981, 2044, 1919, 1982, 1982, 2045, 1983,
- 2046, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_64x64_neighbors[4097 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 64, 1, 1, 64, 64, 2,
- 65, 65, 128, 66, 129, 2, 2, 128, 128, 3, 66, 129, 192,
- 67, 130, 130, 193, 3, 3, 192, 192, 4, 67, 193, 256, 131,
- 194, 68, 131, 194, 257, 4, 4, 132, 195, 195, 258, 256, 256,
- 5, 68, 257, 320, 69, 132, 258, 321, 196, 259, 133, 196, 259,
- 322, 5, 5, 320, 320, 6, 69, 321, 384, 70, 133, 322, 385,
- 197, 260, 260, 323, 134, 197, 323, 386, 6, 6, 384, 384, 7,
- 70, 261, 324, 385, 448, 198, 261, 324, 387, 71, 134, 386, 449,
- 135, 198, 387, 450, 262, 325, 325, 388, 7, 7, 448, 448, 8,
- 71, 199, 262, 388, 451, 449, 512, 72, 135, 450, 513, 326, 389,
- 136, 199, 451, 514, 263, 326, 389, 452, 200, 263, 452, 515, 8,
- 8, 512, 512, 9, 72, 513, 576, 73, 136, 327, 390, 390, 453,
- 514, 577, 264, 327, 453, 516, 137, 200, 515, 578, 201, 264, 516,
- 579, 391, 454, 9, 9, 328, 391, 454, 517, 576, 576, 10, 73,
- 577, 640, 74, 137, 578, 641, 265, 328, 517, 580, 138, 201, 579,
- 642, 392, 455, 455, 518, 202, 265, 580, 643, 329, 392, 518, 581,
- 10, 10, 640, 640, 11, 74, 641, 704, 75, 138, 266, 329, 581,
- 644, 642, 705, 456, 519, 139, 202, 393, 456, 519, 582, 643, 706,
- 330, 393, 582, 645, 203, 266, 644, 707, 11, 11, 704, 704, 12,
- 75, 457, 520, 520, 583, 705, 768, 267, 330, 645, 708, 76, 139,
- 706, 769, 394, 457, 583, 646, 140, 203, 707, 770, 331, 394, 646,
- 709, 204, 267, 708, 771, 521, 584, 458, 521, 584, 647, 12, 12,
- 268, 331, 709, 772, 768, 768, 13, 76, 395, 458, 647, 710, 769,
- 832, 77, 140, 770, 833, 141, 204, 771, 834, 332, 395, 710, 773,
- 522, 585, 585, 648, 205, 268, 459, 522, 648, 711, 772, 835, 396,
- 459, 711, 774, 269, 332, 773, 836, 13, 13, 832, 832, 14, 77,
- 833, 896, 78, 141, 586, 649, 834, 897, 523, 586, 649, 712, 142,
- 205, 333, 396, 774, 837, 835, 898, 460, 523, 712, 775, 206, 269,
- 836, 899, 397, 460, 775, 838, 270, 333, 587, 650, 650, 713, 837,
- 900, 14, 14, 524, 587, 713, 776, 896, 896, 15, 78, 897, 960,
- 79, 142, 898, 961, 334, 397, 838, 901, 461, 524, 776, 839, 143,
- 206, 899, 962, 207, 270, 900, 963, 651, 714, 588, 651, 714, 777,
- 398, 461, 839, 902, 271, 334, 525, 588, 777, 840, 901, 964, 15,
- 15, 960, 960, 16, 79, 961, 1024, 80, 143, 462, 525, 840, 903,
- 962, 1025, 335, 398, 902, 965, 144, 207, 652, 715, 715, 778, 963,
- 1026, 589, 652, 778, 841, 208, 271, 964, 1027, 399, 462, 903, 966,
- 526, 589, 841, 904, 272, 335, 965, 1028, 716, 779, 16, 16, 463,
- 526, 904, 967, 1024, 1024, 17, 80, 653, 716, 779, 842, 1025, 1088,
- 336, 399, 966, 1029, 81, 144, 1026, 1089, 590, 653, 842, 905, 145,
- 208, 1027, 1090, 209, 272, 400, 463, 967, 1030, 1028, 1091, 527, 590,
- 905, 968, 717, 780, 780, 843, 273, 336, 1029, 1092, 654, 717, 843,
- 906, 464, 527, 968, 1031, 17, 17, 1088, 1088, 18, 81, 337, 400,
- 591, 654, 906, 969, 1030, 1093, 1089, 1152, 82, 145, 1090, 1153, 146,
- 209, 1091, 1154, 528, 591, 969, 1032, 401, 464, 781, 844, 1031, 1094,
- 210, 273, 718, 781, 844, 907, 1092, 1155, 655, 718, 907, 970, 274,
- 337, 1093, 1156, 465, 528, 1032, 1095, 592, 655, 970, 1033, 338, 401,
- 1094, 1157, 18, 18, 1152, 1152, 19, 82, 1153, 1216, 83, 146, 782,
- 845, 845, 908, 1154, 1217, 719, 782, 908, 971, 147, 210, 529, 592,
- 1033, 1096, 1155, 1218, 402, 465, 1095, 1158, 211, 274, 656, 719, 971,
- 1034, 1156, 1219, 275, 338, 1157, 1220, 466, 529, 1096, 1159, 593, 656,
- 1034, 1097, 846, 909, 783, 846, 909, 972, 339, 402, 1158, 1221, 19,
- 19, 720, 783, 972, 1035, 1216, 1216, 20, 83, 1217, 1280, 84, 147,
- 1218, 1281, 530, 593, 1097, 1160, 148, 211, 1219, 1282, 403, 466, 657,
- 720, 1035, 1098, 1159, 1222, 212, 275, 1220, 1283, 847, 910, 910, 973,
- 594, 657, 1098, 1161, 276, 339, 467, 530, 784, 847, 973, 1036, 1160,
- 1223, 1221, 1284, 721, 784, 1036, 1099, 340, 403, 1222, 1285, 20, 20,
- 1280, 1280, 21, 84, 531, 594, 1161, 1224, 1281, 1344, 85, 148, 658,
- 721, 1099, 1162, 1282, 1345, 404, 467, 1223, 1286, 149, 212, 911, 974,
- 1283, 1346, 848, 911, 974, 1037, 213, 276, 1284, 1347, 785, 848, 1037,
- 1100, 595, 658, 1162, 1225, 468, 531, 1224, 1287, 277, 340, 1285, 1348,
- 722, 785, 1100, 1163, 341, 404, 1286, 1349, 532, 595, 912, 975, 975,
- 1038, 1225, 1288, 659, 722, 1163, 1226, 21, 21, 1344, 1344, 22, 85,
- 849, 912, 1038, 1101, 1345, 1408, 86, 149, 1346, 1409, 405, 468, 1287,
- 1350, 150, 213, 786, 849, 1101, 1164, 1347, 1410, 214, 277, 596, 659,
- 1226, 1289, 1348, 1411, 469, 532, 723, 786, 1164, 1227, 1288, 1351, 278,
- 341, 1349, 1412, 976, 1039, 913, 976, 1039, 1102, 342, 405, 850, 913,
- 1102, 1165, 1350, 1413, 660, 723, 1227, 1290, 533, 596, 1289, 1352, 22,
- 22, 1408, 1408, 23, 86, 787, 850, 1165, 1228, 1409, 1472, 87, 150,
- 406, 469, 1351, 1414, 1410, 1473, 151, 214, 1411, 1474, 597, 660, 1290,
- 1353, 724, 787, 1228, 1291, 215, 278, 977, 1040, 1040, 1103, 1412, 1475,
- 470, 533, 1352, 1415, 914, 977, 1103, 1166, 279, 342, 1413, 1476, 851,
- 914, 1166, 1229, 661, 724, 1291, 1354, 343, 406, 534, 597, 1353, 1416,
- 1414, 1477, 788, 851, 1229, 1292, 23, 23, 1472, 1472, 24, 87, 1473,
- 1536, 407, 470, 1041, 1104, 1415, 1478, 88, 151, 978, 1041, 1104, 1167,
- 1474, 1537, 598, 661, 1354, 1417, 152, 215, 725, 788, 1292, 1355, 1475,
- 1538, 915, 978, 1167, 1230, 216, 279, 1476, 1539, 471, 534, 1416, 1479,
- 852, 915, 1230, 1293, 280, 343, 1477, 1540, 662, 725, 1355, 1418, 535,
- 598, 789, 852, 1293, 1356, 1417, 1480, 344, 407, 1478, 1541, 1042, 1105,
- 1105, 1168, 979, 1042, 1168, 1231, 24, 24, 408, 471, 916, 979, 1231,
- 1294, 1479, 1542, 1536, 1536, 25, 88, 1537, 1600, 726, 789, 1356, 1419,
- 89, 152, 599, 662, 1418, 1481, 1538, 1601, 153, 216, 1539, 1602, 853,
- 916, 1294, 1357, 472, 535, 1480, 1543, 217, 280, 1540, 1603, 1106, 1169,
- 281, 344, 663, 726, 1043, 1106, 1169, 1232, 1419, 1482, 1541, 1604, 790,
- 853, 1357, 1420, 980, 1043, 1232, 1295, 536, 599, 1481, 1544, 345, 408,
- 1542, 1605, 917, 980, 1295, 1358, 727, 790, 1420, 1483, 409, 472, 1543,
- 1606, 25, 25, 600, 663, 1482, 1545, 1600, 1600, 26, 89, 1601, 1664,
- 90, 153, 854, 917, 1358, 1421, 1602, 1665, 154, 217, 1107, 1170, 1170,
- 1233, 1603, 1666, 473, 536, 1044, 1107, 1233, 1296, 1544, 1607, 218, 281,
- 1604, 1667, 664, 727, 981, 1044, 1296, 1359, 1483, 1546, 791, 854, 1421,
- 1484, 282, 345, 1605, 1668, 537, 600, 1545, 1608, 918, 981, 1359, 1422,
- 346, 409, 1606, 1669, 728, 791, 1484, 1547, 1171, 1234, 1108, 1171, 1234,
- 1297, 410, 473, 601, 664, 855, 918, 1422, 1485, 1546, 1609, 1607, 1670,
- 26, 26, 1664, 1664, 27, 90, 1045, 1108, 1297, 1360, 1665, 1728, 91,
- 154, 1666, 1729, 155, 218, 1667, 1730, 474, 537, 982, 1045, 1360, 1423,
- 1608, 1671, 219, 282, 792, 855, 1485, 1548, 1668, 1731, 665, 728, 1547,
- 1610, 283, 346, 919, 982, 1423, 1486, 1669, 1732, 538, 601, 1609, 1672,
- 1172, 1235, 1235, 1298, 347, 410, 1109, 1172, 1298, 1361, 1670, 1733, 729,
- 792, 1548, 1611, 856, 919, 1486, 1549, 1046, 1109, 1361, 1424, 602, 665,
- 1610, 1673, 411, 474, 1671, 1734, 27, 27, 1728, 1728, 28, 91, 983,
- 1046, 1424, 1487, 1729, 1792, 92, 155, 1730, 1793, 156, 219, 475, 538,
- 1672, 1735, 1731, 1794, 793, 856, 1549, 1612, 666, 729, 1611, 1674, 220,
- 283, 1236, 1299, 1732, 1795, 920, 983, 1487, 1550, 1173, 1236, 1299, 1362,
- 1110, 1173, 1362, 1425, 284, 347, 1733, 1796, 539, 602, 1673, 1736, 1047,
- 1110, 1425, 1488, 348, 411, 730, 793, 1612, 1675, 1734, 1797, 857, 920,
- 1550, 1613, 603, 666, 1674, 1737, 984, 1047, 1488, 1551, 412, 475, 1735,
- 1798, 28, 28, 1237, 1300, 1300, 1363, 1792, 1792, 29, 92, 1793, 1856,
- 93, 156, 794, 857, 1174, 1237, 1363, 1426, 1613, 1676, 1794, 1857, 476,
- 539, 1736, 1799, 157, 220, 667, 730, 921, 984, 1551, 1614, 1675, 1738,
- 1795, 1858, 1111, 1174, 1426, 1489, 221, 284, 1796, 1859, 540, 603, 1048,
- 1111, 1489, 1552, 1737, 1800, 285, 348, 1797, 1860, 858, 921, 1614, 1677,
- 731, 794, 1676, 1739, 349, 412, 1798, 1861, 985, 1048, 1552, 1615, 1301,
- 1364, 604, 667, 1238, 1301, 1364, 1427, 1738, 1801, 413, 476, 1175, 1238,
- 1427, 1490, 1799, 1862, 795, 858, 1677, 1740, 29, 29, 1112, 1175, 1490,
- 1553, 1856, 1856, 30, 93, 922, 985, 1615, 1678, 1857, 1920, 94, 157,
- 1858, 1921, 477, 540, 668, 731, 1739, 1802, 1800, 1863, 158, 221, 1859,
- 1922, 1049, 1112, 1553, 1616, 222, 285, 1860, 1923, 541, 604, 1801, 1864,
- 286, 349, 859, 922, 1302, 1365, 1365, 1428, 1678, 1741, 1861, 1924, 732,
- 795, 1740, 1803, 1239, 1302, 1428, 1491, 986, 1049, 1616, 1679, 350, 413,
- 1862, 1925, 1176, 1239, 1491, 1554, 605, 668, 1802, 1865, 414, 477, 1113,
- 1176, 1554, 1617, 1863, 1926, 796, 859, 1741, 1804, 923, 986, 1679, 1742,
- 30, 30, 1920, 1920, 31, 94, 669, 732, 1803, 1866, 1921, 1984, 478,
- 541, 1864, 1927, 95, 158, 1050, 1113, 1617, 1680, 1922, 1985, 1366, 1429,
- 159, 222, 1303, 1366, 1429, 1492, 1923, 1986, 1240, 1303, 1492, 1555, 223,
- 286, 1924, 1987, 860, 923, 1742, 1805, 542, 605, 1865, 1928, 733, 796,
- 987, 1050, 1680, 1743, 1804, 1867, 287, 350, 1177, 1240, 1555, 1618, 1925,
- 1988, 351, 414, 1926, 1989, 606, 669, 1114, 1177, 1618, 1681, 1866, 1929,
- 924, 987, 1743, 1806, 415, 478, 797, 860, 1805, 1868, 1927, 1990, 1367,
- 1430, 1430, 1493, 1304, 1367, 1493, 1556, 1051, 1114, 1681, 1744, 670, 733,
- 1867, 1930, 31, 31, 1984, 1984, 32, 95, 479, 542, 1241, 1304, 1556,
- 1619, 1928, 1991, 1985, 2048, 96, 159, 1986, 2049, 160, 223, 1987, 2050,
- 861, 924, 1178, 1241, 1619, 1682, 1806, 1869, 224, 287, 988, 1051, 1744,
- 1807, 1988, 2051, 543, 606, 1929, 1992, 734, 797, 1868, 1931, 288, 351,
- 1989, 2052, 1115, 1178, 1682, 1745, 1431, 1494, 352, 415, 1368, 1431, 1494,
- 1557, 1990, 2053, 607, 670, 1930, 1993, 925, 988, 1305, 1368, 1557, 1620,
- 1807, 1870, 798, 861, 1869, 1932, 416, 479, 1052, 1115, 1745, 1808, 1991,
- 2054, 1242, 1305, 1620, 1683, 671, 734, 1931, 1994, 480, 543, 1992, 2055,
- 32, 32, 2048, 2048, 33, 96, 1179, 1242, 1683, 1746, 2049, 2112, 97,
- 160, 2050, 2113, 862, 925, 1870, 1933, 989, 1052, 1808, 1871, 161, 224,
- 2051, 2114, 225, 288, 544, 607, 735, 798, 1432, 1495, 1495, 1558, 1932,
- 1995, 1993, 2056, 2052, 2115, 1116, 1179, 1746, 1809, 1369, 1432, 1558, 1621,
- 289, 352, 2053, 2116, 1306, 1369, 1621, 1684, 608, 671, 1994, 2057, 353,
- 416, 926, 989, 1871, 1934, 2054, 2117, 1243, 1306, 1684, 1747, 799, 862,
- 1053, 1116, 1809, 1872, 1933, 1996, 417, 480, 2055, 2118, 672, 735, 1180,
- 1243, 1747, 1810, 1995, 2058, 1496, 1559, 481, 544, 2056, 2119, 1433, 1496,
- 1559, 1622, 33, 33, 990, 1053, 1872, 1935, 2112, 2112, 34, 97, 863,
- 926, 1934, 1997, 2113, 2176, 98, 161, 1370, 1433, 1622, 1685, 2114, 2177,
- 162, 225, 1117, 1180, 1810, 1873, 2115, 2178, 736, 799, 1996, 2059, 545,
- 608, 1307, 1370, 1685, 1748, 2057, 2120, 226, 289, 2116, 2179, 290, 353,
- 2117, 2180, 1244, 1307, 1748, 1811, 927, 990, 1935, 1998, 609, 672, 1054,
- 1117, 1873, 1936, 2058, 2121, 354, 417, 2118, 2181, 800, 863, 1997, 2060,
- 1497, 1560, 1560, 1623, 1181, 1244, 1811, 1874, 418, 481, 1434, 1497, 1623,
- 1686, 2119, 2182, 673, 736, 2059, 2122, 1371, 1434, 1686, 1749, 991, 1054,
- 1936, 1999, 482, 545, 864, 927, 1998, 2061, 2120, 2183, 1118, 1181, 1874,
- 1937, 34, 34, 1308, 1371, 1749, 1812, 2176, 2176, 35, 98, 2177, 2240,
- 99, 162, 2178, 2241, 737, 800, 2060, 2123, 163, 226, 2179, 2242, 546,
- 609, 2121, 2184, 227, 290, 1245, 1308, 1812, 1875, 2180, 2243, 928, 991,
- 1999, 2062, 291, 354, 1055, 1118, 1561, 1624, 1937, 2000, 2181, 2244, 1498,
- 1561, 1624, 1687, 610, 673, 2122, 2185, 801, 864, 1435, 1498, 1687, 1750,
- 2061, 2124, 355, 418, 1182, 1245, 1875, 1938, 2182, 2245, 1372, 1435, 1750,
- 1813, 419, 482, 2183, 2246, 674, 737, 2123, 2186, 992, 1055, 2000, 2063,
- 1309, 1372, 1813, 1876, 865, 928, 1119, 1182, 1938, 2001, 2062, 2125, 483,
- 546, 2184, 2247, 35, 35, 2240, 2240, 36, 99, 2241, 2304, 100, 163,
- 738, 801, 1246, 1309, 1876, 1939, 2124, 2187, 2242, 2305, 1562, 1625, 1625,
- 1688, 164, 227, 1499, 1562, 1688, 1751, 2243, 2306, 547, 610, 2185, 2248,
- 228, 291, 2244, 2307, 1056, 1119, 1436, 1499, 1751, 1814, 2001, 2064, 929,
- 992, 2063, 2126, 292, 355, 2245, 2308, 1183, 1246, 1939, 2002, 611, 674,
- 802, 865, 1373, 1436, 1814, 1877, 2125, 2188, 2186, 2249, 356, 419, 2246,
- 2309, 1310, 1373, 1877, 1940, 420, 483, 993, 1056, 2064, 2127, 2247, 2310,
- 675, 738, 2187, 2250, 1120, 1183, 2002, 2065, 866, 929, 1626, 1689, 2126,
- 2189, 1563, 1626, 1689, 1752, 484, 547, 1500, 1563, 1752, 1815, 2248, 2311,
- 1247, 1310, 1940, 2003, 36, 36, 739, 802, 2188, 2251, 2304, 2304, 37,
- 100, 1437, 1500, 1815, 1878, 2305, 2368, 101, 164, 2306, 2369, 548, 611,
- 2249, 2312, 165, 228, 1057, 1120, 2065, 2128, 2307, 2370, 930, 993, 2127,
- 2190, 1374, 1437, 1878, 1941, 229, 292, 1184, 1247, 2003, 2066, 2308, 2371,
- 293, 356, 803, 866, 2189, 2252, 2309, 2372, 612, 675, 2250, 2313, 1311,
- 1374, 1941, 2004, 357, 420, 1627, 1690, 1690, 1753, 2310, 2373, 1564, 1627,
- 1753, 1816, 994, 1057, 2128, 2191, 1121, 1184, 2066, 2129, 676, 739, 1501,
- 1564, 1816, 1879, 2251, 2314, 421, 484, 2311, 2374, 867, 930, 2190, 2253,
- 1248, 1311, 2004, 2067, 1438, 1501, 1879, 1942, 485, 548, 2312, 2375, 740,
- 803, 2252, 2315, 37, 37, 2368, 2368, 38, 101, 1058, 1121, 1375, 1438,
- 1942, 2005, 2129, 2192, 2369, 2432, 102, 165, 2370, 2433, 549, 612, 931,
- 994, 1185, 1248, 2067, 2130, 2191, 2254, 2313, 2376, 166, 229, 2371, 2434,
- 1691, 1754, 230, 293, 1628, 1691, 1754, 1817, 2372, 2435, 804, 867, 1312,
- 1375, 2005, 2068, 2253, 2316, 1565, 1628, 1817, 1880, 294, 357, 613, 676,
- 2314, 2377, 2373, 2436, 1502, 1565, 1880, 1943, 358, 421, 1122, 1185, 2130,
- 2193, 2374, 2437, 995, 1058, 2192, 2255, 1249, 1312, 2068, 2131, 677, 740,
- 1439, 1502, 1943, 2006, 2315, 2378, 868, 931, 2254, 2317, 422, 485, 2375,
- 2438, 486, 549, 1376, 1439, 2006, 2069, 2376, 2439, 741, 804, 1692, 1755,
- 1755, 1818, 2316, 2379, 1059, 1122, 2193, 2256, 1186, 1249, 1629, 1692, 1818,
- 1881, 2131, 2194, 38, 38, 932, 995, 2255, 2318, 2432, 2432, 39, 102,
- 2433, 2496, 103, 166, 550, 613, 1566, 1629, 1881, 1944, 2377, 2440, 2434,
- 2497, 167, 230, 1313, 1376, 2069, 2132, 2435, 2498, 231, 294, 1503, 1566,
- 1944, 2007, 2436, 2499, 805, 868, 2317, 2380, 614, 677, 2378, 2441, 295,
- 358, 2437, 2500, 1123, 1186, 2194, 2257, 996, 1059, 2256, 2319, 1440, 1503,
- 2007, 2070, 1250, 1313, 2132, 2195, 359, 422, 2438, 2501, 678, 741, 869,
- 932, 2318, 2381, 2379, 2442, 1756, 1819, 423, 486, 1693, 1756, 1819, 1882,
- 2439, 2502, 1377, 1440, 2070, 2133, 1630, 1693, 1882, 1945, 487, 550, 1060,
- 1123, 2257, 2320, 2440, 2503, 1187, 1250, 1567, 1630, 1945, 2008, 2195, 2258,
- 742, 805, 2380, 2443, 933, 996, 2319, 2382, 1314, 1377, 2133, 2196, 39,
- 39, 1504, 1567, 2008, 2071, 2496, 2496, 40, 103, 2497, 2560, 551, 614,
- 2441, 2504, 104, 167, 2498, 2561, 168, 231, 2499, 2562, 806, 869, 2381,
- 2444, 232, 295, 2500, 2563, 1441, 1504, 2071, 2134, 1124, 1187, 2258, 2321,
- 615, 678, 2442, 2505, 296, 359, 997, 1060, 1251, 1314, 1757, 1820, 1820,
- 1883, 2196, 2259, 2320, 2383, 2501, 2564, 1694, 1757, 1883, 1946, 360, 423,
- 2502, 2565, 1631, 1694, 1946, 2009, 870, 933, 1378, 1441, 2134, 2197, 2382,
- 2445, 679, 742, 2443, 2506, 424, 487, 1568, 1631, 2009, 2072, 2503, 2566,
- 1188, 1251, 2259, 2322, 1061, 1124, 2321, 2384, 488, 551, 2504, 2567, 743,
- 806, 1505, 1568, 2072, 2135, 2444, 2507, 1315, 1378, 2197, 2260, 934, 997,
- 2383, 2446, 40, 40, 552, 615, 2505, 2568, 2560, 2560, 41, 104, 1821,
- 1884, 2561, 2624, 1758, 1821, 1884, 1947, 105, 168, 1442, 1505, 2135, 2198,
- 2562, 2625, 169, 232, 807, 870, 1695, 1758, 1947, 2010, 2445, 2508, 2563,
- 2626, 1125, 1188, 2322, 2385, 1252, 1315, 2260, 2323, 233, 296, 2564, 2627,
- 616, 679, 998, 1061, 1632, 1695, 2010, 2073, 2384, 2447, 2506, 2569, 297,
- 360, 2565, 2628, 1379, 1442, 2198, 2261, 1569, 1632, 2073, 2136, 361, 424,
- 871, 934, 2446, 2509, 2566, 2629, 680, 743, 2507, 2570, 425, 488, 1189,
- 1252, 2323, 2386, 2567, 2630, 1506, 1569, 2136, 2199, 1062, 1125, 2385, 2448,
- 1316, 1379, 2261, 2324, 1822, 1885, 1885, 1948, 744, 807, 2508, 2571, 489,
- 552, 1759, 1822, 1948, 2011, 2568, 2631, 935, 998, 2447, 2510, 1696, 1759,
- 2011, 2074, 1443, 1506, 2199, 2262, 553, 616, 2569, 2632, 41, 41, 2624,
- 2624, 42, 105, 1633, 1696, 2074, 2137, 2625, 2688, 106, 169, 1126, 1189,
- 2386, 2449, 2626, 2689, 808, 871, 1253, 1316, 2324, 2387, 2509, 2572, 170,
- 233, 2627, 2690, 999, 1062, 2448, 2511, 234, 297, 1380, 1443, 2262, 2325,
- 2628, 2691, 617, 680, 1570, 1633, 2137, 2200, 2570, 2633, 298, 361, 2629,
- 2692, 872, 935, 2510, 2573, 362, 425, 1886, 1949, 2630, 2693, 1507, 1570,
- 2200, 2263, 681, 744, 1823, 1886, 1949, 2012, 2571, 2634, 1190, 1253, 2387,
- 2450, 1760, 1823, 2012, 2075, 1063, 1126, 1317, 1380, 2325, 2388, 2449, 2512,
- 426, 489, 2631, 2694, 1697, 1760, 2075, 2138, 745, 808, 936, 999, 1444,
- 1507, 2263, 2326, 2511, 2574, 2572, 2635, 490, 553, 2632, 2695, 1634, 1697,
- 2138, 2201, 1254, 1317, 2388, 2451, 554, 617, 1127, 1190, 2450, 2513, 2633,
- 2696, 42, 42, 2688, 2688, 43, 106, 809, 872, 1571, 1634, 2201, 2264,
- 2573, 2636, 2689, 2752, 107, 170, 1381, 1444, 2326, 2389, 2690, 2753, 1000,
- 1063, 2512, 2575, 171, 234, 2691, 2754, 1887, 1950, 1950, 2013, 618, 681,
- 2634, 2697, 235, 298, 1824, 1887, 2013, 2076, 2692, 2755, 1508, 1571, 2264,
- 2327, 1761, 1824, 2076, 2139, 299, 362, 2693, 2756, 873, 936, 2574, 2637,
- 1191, 1254, 2451, 2514, 363, 426, 682, 745, 1318, 1381, 1698, 1761, 2139,
- 2202, 2389, 2452, 2635, 2698, 2694, 2757, 1064, 1127, 2513, 2576, 427, 490,
- 1445, 1508, 2327, 2390, 2695, 2758, 1635, 1698, 2202, 2265, 937, 1000, 2575,
- 2638, 746, 809, 2636, 2699, 491, 554, 2696, 2759, 1255, 1318, 1572, 1635,
- 2265, 2328, 2452, 2515, 1951, 2014, 1128, 1191, 1888, 1951, 2014, 2077, 2514,
- 2577, 1382, 1445, 2390, 2453, 555, 618, 1825, 1888, 2077, 2140, 2697, 2760,
- 810, 873, 2637, 2700, 43, 43, 2752, 2752, 44, 107, 1001, 1064, 2576,
- 2639, 2753, 2816, 108, 171, 1762, 1825, 2140, 2203, 2754, 2817, 172, 235,
- 1509, 1572, 2328, 2391, 2755, 2818, 619, 682, 2698, 2761, 236, 299, 2756,
- 2819, 1699, 1762, 2203, 2266, 874, 937, 2638, 2701, 300, 363, 1192, 1255,
- 2515, 2578, 2757, 2820, 1319, 1382, 2453, 2516, 683, 746, 1065, 1128, 2577,
- 2640, 2699, 2762, 364, 427, 1636, 1699, 2266, 2329, 2758, 2821, 1446, 1509,
- 2391, 2454, 428, 491, 1952, 2015, 2015, 2078, 2759, 2822, 938, 1001, 1889,
- 1952, 2078, 2141, 2639, 2702, 747, 810, 2700, 2763, 1573, 1636, 2329, 2392,
- 1826, 1889, 2141, 2204, 492, 555, 1256, 1319, 2516, 2579, 2760, 2823, 1129,
- 1192, 1383, 1446, 2454, 2517, 2578, 2641, 1763, 1826, 2204, 2267, 556, 619,
- 2761, 2824, 811, 874, 2701, 2764, 1002, 1065, 1510, 1573, 2392, 2455, 2640,
- 2703, 44, 44, 1700, 1763, 2267, 2330, 2816, 2816, 45, 108, 2817, 2880,
- 109, 172, 2818, 2881, 173, 236, 2819, 2882, 620, 683, 2762, 2825, 237,
- 300, 1320, 1383, 2517, 2580, 2820, 2883, 1193, 1256, 2579, 2642, 875, 938,
- 1637, 1700, 2330, 2393, 2702, 2765, 2016, 2079, 301, 364, 1447, 1510, 1953,
- 2016, 2079, 2142, 2455, 2518, 2821, 2884, 1066, 1129, 2641, 2704, 1890, 1953,
- 2142, 2205, 684, 747, 2763, 2826, 365, 428, 2822, 2885, 1827, 1890, 2205,
- 2268, 1574, 1637, 2393, 2456, 429, 492, 939, 1002, 2703, 2766, 2823, 2886,
- 748, 811, 1764, 1827, 2268, 2331, 2764, 2827, 1257, 1320, 2580, 2643, 1384,
- 1447, 2518, 2581, 1130, 1193, 2642, 2705, 493, 556, 2824, 2887, 1511, 1574,
- 2456, 2519, 1701, 1764, 2331, 2394, 812, 875, 1003, 1066, 2704, 2767, 2765,
- 2828, 557, 620, 2825, 2888, 2017, 2080, 2080, 2143, 45, 45, 2880, 2880,
- 46, 109, 1954, 2017, 2143, 2206, 2881, 2944, 110, 173, 1638, 1701, 2394,
- 2457, 2882, 2945, 1321, 1384, 2581, 2644, 174, 237, 621, 684, 1194, 1257,
- 1891, 1954, 2206, 2269, 2643, 2706, 2826, 2889, 2883, 2946, 1448, 1511, 2519,
- 2582, 238, 301, 876, 939, 2766, 2829, 2884, 2947, 1828, 1891, 2269, 2332,
- 1067, 1130, 2705, 2768, 302, 365, 2885, 2948, 685, 748, 1575, 1638, 2457,
- 2520, 2827, 2890, 366, 429, 2886, 2949, 1765, 1828, 2332, 2395, 940, 1003,
- 2767, 2830, 1258, 1321, 2644, 2707, 430, 493, 1385, 1448, 2582, 2645, 2887,
- 2950, 749, 812, 2828, 2891, 1131, 1194, 1702, 1765, 2395, 2458, 2706, 2769,
- 1512, 1575, 2520, 2583, 2081, 2144, 494, 557, 2018, 2081, 2144, 2207, 2888,
- 2951, 1955, 2018, 2207, 2270, 1004, 1067, 2768, 2831, 813, 876, 2829, 2892,
- 1892, 1955, 2270, 2333, 558, 621, 1639, 1702, 2458, 2521, 2889, 2952, 1322,
- 1385, 2645, 2708, 46, 46, 2944, 2944, 47, 110, 1195, 1258, 1449, 1512,
- 1829, 1892, 2333, 2396, 2583, 2646, 2707, 2770, 2945, 3008, 111, 174, 2946,
- 3009, 622, 685, 2890, 2953, 175, 238, 2947, 3010, 877, 940, 2830, 2893,
- 239, 302, 1068, 1131, 1576, 1639, 2521, 2584, 2769, 2832, 2948, 3011, 1766,
- 1829, 2396, 2459, 303, 366, 2949, 3012, 686, 749, 2891, 2954, 367, 430,
- 2082, 2145, 2145, 2208, 2950, 3013, 1386, 1449, 2646, 2709, 1259, 1322, 2019,
- 2082, 2208, 2271, 2708, 2771, 941, 1004, 1703, 1766, 2459, 2522, 2831, 2894,
- 1513, 1576, 1956, 2019, 2271, 2334, 2584, 2647, 431, 494, 2951, 3014, 750,
- 813, 1132, 1195, 2770, 2833, 2892, 2955, 1893, 1956, 2334, 2397, 495, 558,
- 2952, 3015, 1640, 1703, 2522, 2585, 1005, 1068, 2832, 2895, 814, 877, 1830,
- 1893, 2397, 2460, 2893, 2956, 559, 622, 1323, 1386, 2709, 2772, 2953, 3016,
- 1450, 1513, 2647, 2710, 1196, 1259, 2771, 2834, 47, 47, 3008, 3008, 48,
- 111, 1767, 1830, 2460, 2523, 3009, 3072, 1577, 1640, 2585, 2648, 112, 175,
- 3010, 3073, 623, 686, 2954, 3017, 878, 941, 2146, 2209, 2894, 2957, 176,
- 239, 3011, 3074, 1069, 1132, 2083, 2146, 2209, 2272, 2833, 2896, 240, 303,
- 2020, 2083, 2272, 2335, 3012, 3075, 304, 367, 1704, 1767, 2523, 2586, 3013,
- 3076, 687, 750, 1957, 2020, 2335, 2398, 2955, 3018, 1387, 1450, 2710, 2773,
- 1260, 1323, 2772, 2835, 368, 431, 1514, 1577, 2648, 2711, 3014, 3077, 942,
- 1005, 2895, 2958, 1894, 1957, 2398, 2461, 1133, 1196, 2834, 2897, 432, 495,
- 751, 814, 2956, 3019, 3015, 3078, 1641, 1704, 2586, 2649, 1831, 1894, 2461,
- 2524, 496, 559, 3016, 3079, 1006, 1069, 2896, 2959, 1324, 1387, 2773, 2836,
- 815, 878, 1451, 1514, 2711, 2774, 2957, 3020, 2147, 2210, 2210, 2273, 1768,
- 1831, 2524, 2587, 560, 623, 2084, 2147, 2273, 2336, 3017, 3080, 1197, 1260,
- 2835, 2898, 1578, 1641, 2649, 2712, 2021, 2084, 2336, 2399, 48, 48, 3072,
- 3072, 49, 112, 3073, 3136, 624, 687, 3018, 3081, 113, 176, 879, 942,
- 1070, 1133, 1958, 2021, 2399, 2462, 2897, 2960, 2958, 3021, 3074, 3137, 177,
- 240, 1705, 1768, 2587, 2650, 3075, 3138, 241, 304, 3076, 3139, 1388, 1451,
- 2774, 2837, 1895, 1958, 2462, 2525, 688, 751, 1261, 1324, 1515, 1578, 2712,
- 2775, 2836, 2899, 3019, 3082, 305, 368, 3077, 3140, 943, 1006, 2959, 3022,
- 369, 432, 3078, 3141, 1134, 1197, 1642, 1705, 2650, 2713, 2898, 2961, 1832,
- 1895, 2525, 2588, 752, 815, 3020, 3083, 433, 496, 2211, 2274, 3079, 3142,
- 2148, 2211, 2274, 2337, 2085, 2148, 2337, 2400, 497, 560, 1007, 1070, 1452,
- 1515, 1769, 1832, 2588, 2651, 2775, 2838, 2960, 3023, 3080, 3143, 1325, 1388,
- 2837, 2900, 2022, 2085, 2400, 2463, 816, 879, 3021, 3084, 1579, 1642, 2713,
- 2776, 1198, 1261, 2899, 2962, 561, 624, 1959, 2022, 2463, 2526, 3081, 3144,
- 1706, 1769, 2651, 2714, 1071, 1134, 2961, 3024, 49, 49, 880, 943, 1896,
- 1959, 2526, 2589, 3022, 3085, 3136, 3136, 50, 113, 625, 688, 3082, 3145,
- 3137, 3200, 114, 177, 3138, 3201, 178, 241, 1389, 1452, 2838, 2901, 3139,
- 3202, 1516, 1579, 2776, 2839, 242, 305, 1262, 1325, 2900, 2963, 3140, 3203,
- 2212, 2275, 2275, 2338, 689, 752, 1833, 1896, 2589, 2652, 3083, 3146, 306,
- 369, 1643, 1706, 2149, 2212, 2338, 2401, 2714, 2777, 3141, 3204, 944, 1007,
- 3023, 3086, 1135, 1198, 2086, 2149, 2401, 2464, 2962, 3025, 370, 433, 3142,
- 3205, 753, 816, 2023, 2086, 2464, 2527, 3084, 3147, 1770, 1833, 2652, 2715,
- 434, 497, 3143, 3206, 1453, 1516, 2839, 2902, 1326, 1389, 2901, 2964, 1008,
- 1071, 3024, 3087, 1580, 1643, 1960, 2023, 2527, 2590, 2777, 2840, 498, 561,
- 3144, 3207, 817, 880, 1199, 1262, 2963, 3026, 3085, 3148, 1707, 1770, 2715,
- 2778, 562, 625, 1897, 1960, 2590, 2653, 3145, 3208, 2276, 2339, 1072, 1135,
- 3025, 3088, 2213, 2276, 2339, 2402, 881, 944, 3086, 3149, 626, 689, 1390,
- 1453, 2150, 2213, 2402, 2465, 2902, 2965, 3146, 3209, 50, 50, 1517, 1580,
- 2840, 2903, 3200, 3200, 51, 114, 3201, 3264, 115, 178, 1834, 1897, 2653,
- 2716, 3202, 3265, 1263, 1326, 2964, 3027, 179, 242, 2087, 2150, 2465, 2528,
- 3203, 3266, 1644, 1707, 2778, 2841, 243, 306, 3204, 3267, 690, 753, 3147,
- 3210, 2024, 2087, 2528, 2591, 307, 370, 945, 1008, 3087, 3150, 3205, 3268,
- 1136, 1199, 3026, 3089, 1771, 1834, 2716, 2779, 371, 434, 3206, 3269, 1961,
- 2024, 2591, 2654, 754, 817, 3148, 3211, 1454, 1517, 2903, 2966, 435, 498,
- 1327, 1390, 1581, 1644, 2841, 2904, 2965, 3028, 3207, 3270, 1009, 1072, 3088,
- 3151, 1898, 1961, 2654, 2717, 499, 562, 1200, 1263, 1708, 1771, 2277, 2340,
- 2340, 2403, 2779, 2842, 3027, 3090, 3208, 3271, 818, 881, 2214, 2277, 2403,
- 2466, 3149, 3212, 2151, 2214, 2466, 2529, 563, 626, 3209, 3272, 2088, 2151,
- 2529, 2592, 1073, 1136, 1835, 1898, 2717, 2780, 3089, 3152, 1518, 1581, 2904,
- 2967, 1391, 1454, 2966, 3029, 882, 945, 3150, 3213, 627, 690, 1645, 1708,
- 2842, 2905, 3210, 3273, 51, 51, 1264, 1327, 3028, 3091, 3264, 3264, 52,
- 115, 2025, 2088, 2592, 2655, 3265, 3328, 116, 179, 3266, 3329, 180, 243,
- 3267, 3330, 244, 307, 1772, 1835, 2780, 2843, 3268, 3331, 691, 754, 3211,
- 3274, 946, 1009, 1137, 1200, 1962, 2025, 2655, 2718, 3090, 3153, 3151, 3214,
- 308, 371, 3269, 3332, 1455, 1518, 2341, 2404, 2967, 3030, 372, 435, 2278,
- 2341, 2404, 2467, 3270, 3333, 1582, 1645, 2905, 2968, 755, 818, 1328, 1391,
- 3029, 3092, 3212, 3275, 2215, 2278, 2467, 2530, 1899, 1962, 2718, 2781, 436,
- 499, 3271, 3334, 1709, 1772, 2843, 2906, 1010, 1073, 2152, 2215, 2530, 2593,
- 3152, 3215, 1201, 1264, 3091, 3154, 500, 563, 3272, 3335, 819, 882, 2089,
- 2152, 2593, 2656, 3213, 3276, 1836, 1899, 2781, 2844, 564, 627, 1519, 1582,
- 2968, 3031, 3273, 3336, 1392, 1455, 2026, 2089, 2656, 2719, 3030, 3093, 1074,
- 1137, 3153, 3216, 1646, 1709, 2906, 2969, 883, 946, 3214, 3277, 1265, 1328,
- 3092, 3155, 628, 691, 3274, 3337, 52, 52, 1773, 1836, 2844, 2907, 3328,
- 3328, 53, 116, 1963, 2026, 2719, 2782, 3329, 3392, 117, 180, 2342, 2405,
- 2405, 2468, 3330, 3393, 2279, 2342, 2468, 2531, 181, 244, 3331, 3394, 1138,
- 1201, 3154, 3217, 245, 308, 692, 755, 2216, 2279, 2531, 2594, 3275, 3338,
- 3332, 3395, 947, 1010, 3215, 3278, 1456, 1519, 3031, 3094, 309, 372, 1583,
- 1646, 2969, 3032, 3333, 3396, 1900, 1963, 2782, 2845, 2153, 2216, 2594, 2657,
- 1329, 1392, 3093, 3156, 373, 436, 1710, 1773, 2907, 2970, 3334, 3397, 756,
- 819, 3276, 3339, 2090, 2153, 2657, 2720, 1011, 1074, 3216, 3279, 437, 500,
- 3335, 3398, 1202, 1265, 3155, 3218, 1837, 1900, 2845, 2908, 501, 564, 820,
- 883, 2027, 2090, 2720, 2783, 3277, 3340, 3336, 3399, 1520, 1583, 3032, 3095,
- 1393, 1456, 1647, 1710, 2970, 3033, 3094, 3157, 2406, 2469, 565, 628, 1075,
- 1138, 2343, 2406, 2469, 2532, 3217, 3280, 3337, 3400, 2280, 2343, 2532, 2595,
- 1964, 2027, 2783, 2846, 884, 947, 1266, 1329, 1774, 1837, 2908, 2971, 3156,
- 3219, 3278, 3341, 2217, 2280, 2595, 2658, 629, 692, 3338, 3401, 53, 53,
- 3392, 3392, 54, 117, 3393, 3456, 118, 181, 2154, 2217, 2658, 2721, 3394,
- 3457, 182, 245, 1139, 1202, 1901, 1964, 2846, 2909, 3218, 3281, 3395, 3458,
- 948, 1011, 1584, 1647, 3033, 3096, 3279, 3342, 693, 756, 1457, 1520, 3095,
- 3158, 3339, 3402, 246, 309, 3396, 3459, 1711, 1774, 2091, 2154, 2721, 2784,
- 2971, 3034, 310, 373, 1330, 1393, 3157, 3220, 3397, 3460, 374, 437, 3398,
- 3461, 757, 820, 3340, 3403, 1838, 1901, 2909, 2972, 1012, 1075, 2028, 2091,
- 2784, 2847, 3280, 3343, 1203, 1266, 3219, 3282, 438, 501, 2407, 2470, 2470,
- 2533, 3399, 3462, 2344, 2407, 2533, 2596, 1521, 1584, 2281, 2344, 2596, 2659,
- 3096, 3159, 821, 884, 3341, 3404, 502, 565, 1648, 1711, 3034, 3097, 3400,
- 3463, 1394, 1457, 3158, 3221, 1965, 2028, 2847, 2910, 2218, 2281, 2659, 2722,
- 1076, 1139, 1775, 1838, 2972, 3035, 3281, 3344, 566, 629, 3401, 3464, 1267,
- 1330, 3220, 3283, 885, 948, 2155, 2218, 2722, 2785, 3342, 3405, 630, 693,
- 1902, 1965, 2910, 2973, 3402, 3465, 54, 54, 2092, 2155, 2785, 2848, 3456,
- 3456, 55, 118, 1585, 1648, 3097, 3160, 3457, 3520, 1140, 1203, 3282, 3345,
- 119, 182, 1458, 1521, 3159, 3222, 3458, 3521, 1712, 1775, 3035, 3098, 183,
- 246, 949, 1012, 3343, 3406, 3459, 3522, 694, 757, 3403, 3466, 247, 310,
- 3460, 3523, 1331, 1394, 2471, 2534, 3221, 3284, 2408, 2471, 2534, 2597, 2029,
- 2092, 2848, 2911, 311, 374, 1839, 1902, 2345, 2408, 2597, 2660, 2973, 3036,
- 3461, 3524, 758, 821, 2282, 2345, 2660, 2723, 3404, 3467, 375, 438, 3462,
- 3525, 1013, 1076, 1204, 1267, 3283, 3346, 3344, 3407, 439, 502, 2219, 2282,
- 2723, 2786, 3463, 3526, 1522, 1585, 3160, 3223, 1649, 1712, 1966, 2029, 2911,
- 2974, 3098, 3161, 822, 885, 1395, 1458, 3222, 3285, 3405, 3468, 1776, 1839,
- 3036, 3099, 503, 566, 3464, 3527, 2156, 2219, 2786, 2849, 1077, 1140, 3345,
- 3408, 1268, 1331, 3284, 3347, 567, 630, 3465, 3528, 1903, 1966, 2974, 3037,
- 886, 949, 3406, 3469, 2093, 2156, 2849, 2912, 2472, 2535, 2535, 2598, 631,
- 694, 1586, 1649, 2409, 2472, 2598, 2661, 3161, 3224, 3466, 3529, 1459, 1522,
- 1713, 1776, 3099, 3162, 3223, 3286, 1141, 1204, 2346, 2409, 2661, 2724, 3346,
- 3409, 55, 55, 3520, 3520, 56, 119, 3521, 3584, 120, 183, 2030, 2093,
- 2912, 2975, 3522, 3585, 950, 1013, 3407, 3470, 184, 247, 1332, 1395, 1840,
- 1903, 2283, 2346, 2724, 2787, 3037, 3100, 3285, 3348, 3523, 3586, 695, 758,
- 3467, 3530, 248, 311, 3524, 3587, 312, 375, 2220, 2283, 2787, 2850, 3525,
- 3588, 759, 822, 3468, 3531, 1205, 1268, 1967, 2030, 2975, 3038, 3347, 3410,
- 376, 439, 1014, 1077, 3408, 3471, 3526, 3589, 1650, 1713, 3162, 3225, 1523,
- 1586, 3224, 3287, 2157, 2220, 2850, 2913, 440, 503, 1777, 1840, 3100, 3163,
- 3527, 3590, 1396, 1459, 3286, 3349, 823, 886, 3469, 3532, 504, 567, 2536,
- 2599, 3528, 3591, 2473, 2536, 2599, 2662, 1904, 1967, 3038, 3101, 1078, 1141,
- 2094, 2157, 2913, 2976, 3409, 3472, 2410, 2473, 2662, 2725, 1269, 1332, 3348,
- 3411, 568, 631, 3529, 3592, 2347, 2410, 2725, 2788, 887, 950, 3470, 3533,
- 1587, 1650, 3225, 3288, 1714, 1777, 3163, 3226, 2284, 2347, 2788, 2851, 1460,
- 1523, 2031, 2094, 2976, 3039, 3287, 3350, 632, 695, 3530, 3593, 1142, 1205,
- 3410, 3473, 1841, 1904, 3101, 3164, 56, 56, 3584, 3584, 57, 120, 951,
- 1014, 1333, 1396, 2221, 2284, 2851, 2914, 3349, 3412, 3471, 3534, 3585, 3648,
- 121, 184, 3586, 3649, 696, 759, 3531, 3594, 185, 248, 3587, 3650, 249,
- 312, 1968, 2031, 3039, 3102, 3588, 3651, 2158, 2221, 2914, 2977, 313, 376,
- 3589, 3652, 1206, 1269, 1651, 1714, 3226, 3289, 3411, 3474, 760, 823, 1524,
- 1587, 3288, 3351, 3532, 3595, 1015, 1078, 2537, 2600, 2600, 2663, 3472, 3535,
- 1778, 1841, 3164, 3227, 377, 440, 2474, 2537, 2663, 2726, 3590, 3653, 1397,
- 1460, 2411, 2474, 2726, 2789, 3350, 3413, 441, 504, 2095, 2158, 2977, 3040,
- 3591, 3654, 1905, 1968, 3102, 3165, 824, 887, 2348, 2411, 2789, 2852, 3533,
- 3596, 505, 568, 3592, 3655, 1079, 1142, 3473, 3536, 1270, 1333, 3412, 3475,
- 2285, 2348, 2852, 2915, 2032, 2095, 3040, 3103, 1588, 1651, 3289, 3352, 569,
- 632, 1715, 1778, 3227, 3290, 3593, 3656, 888, 951, 3534, 3597, 1461, 1524,
- 3351, 3414, 1842, 1905, 2222, 2285, 2915, 2978, 3165, 3228, 633, 696, 1143,
- 1206, 3474, 3537, 3594, 3657, 1334, 1397, 3413, 3476, 952, 1015, 3535, 3598,
- 1969, 2032, 2601, 2664, 3103, 3166, 57, 57, 2538, 2601, 2664, 2727, 3648,
- 3648, 58, 121, 2159, 2222, 2978, 3041, 3649, 3712, 122, 185, 3650, 3713,
- 697, 760, 2475, 2538, 2727, 2790, 3595, 3658, 186, 249, 3651, 3714, 250,
- 313, 1652, 1715, 2412, 2475, 2790, 2853, 3290, 3353, 3652, 3715, 1525, 1588,
- 1779, 1842, 3228, 3291, 3352, 3415, 1207, 1270, 3475, 3538, 314, 377, 3653,
- 3716, 1016, 1079, 3536, 3599, 761, 824, 2096, 2159, 3041, 3104, 3596, 3659,
- 2349, 2412, 2853, 2916, 378, 441, 1398, 1461, 1906, 1969, 3166, 3229, 3414,
- 3477, 3654, 3717, 2286, 2349, 2916, 2979, 442, 505, 3655, 3718, 825, 888,
- 3597, 3660, 1080, 1143, 1271, 1334, 2033, 2096, 3104, 3167, 3476, 3539, 3537,
- 3600, 506, 569, 3656, 3719, 1716, 1779, 3291, 3354, 1589, 1652, 2223, 2286,
- 2979, 3042, 3353, 3416, 1843, 1906, 3229, 3292, 570, 633, 889, 952, 1462,
- 1525, 2602, 2665, 2665, 2728, 3415, 3478, 3598, 3661, 3657, 3720, 2539, 2602,
- 2728, 2791, 2476, 2539, 2791, 2854, 1144, 1207, 2160, 2223, 3042, 3105, 3538,
- 3601, 1970, 2033, 3167, 3230, 634, 697, 3658, 3721, 1335, 1398, 3477, 3540,
- 2413, 2476, 2854, 2917, 953, 1016, 3599, 3662, 58, 58, 3712, 3712, 59,
- 122, 3713, 3776, 123, 186, 698, 761, 1653, 1716, 2350, 2413, 2917, 2980,
- 3354, 3417, 3659, 3722, 3714, 3777, 1780, 1843, 3292, 3355, 187, 250, 2097,
- 2160, 3105, 3168, 3715, 3778, 1526, 1589, 3416, 3479, 251, 314, 1208, 1271,
- 3539, 3602, 3716, 3779, 1907, 1970, 3230, 3293, 1017, 1080, 2287, 2350, 2980,
- 3043, 3600, 3663, 315, 378, 3717, 3780, 762, 825, 3660, 3723, 1399, 1462,
- 3478, 3541, 379, 442, 3718, 3781, 2034, 2097, 3168, 3231, 2666, 2729, 2224,
- 2287, 3043, 3106, 443, 506, 2603, 2666, 2729, 2792, 3719, 3782, 826, 889,
- 3661, 3724, 1272, 1335, 2540, 2603, 2792, 2855, 3540, 3603, 1081, 1144, 1717,
- 1780, 3355, 3418, 3601, 3664, 1590, 1653, 3417, 3480, 507, 570, 1844, 1907,
- 3293, 3356, 3720, 3783, 2477, 2540, 2855, 2918, 1463, 1526, 3479, 3542, 2161,
- 2224, 3106, 3169, 890, 953, 2414, 2477, 2918, 2981, 3662, 3725, 571, 634,
- 1971, 2034, 3231, 3294, 3721, 3784, 1145, 1208, 3602, 3665, 1336, 1399, 3541,
- 3604, 2351, 2414, 2981, 3044, 635, 698, 3722, 3785, 954, 1017, 2098, 2161,
- 3169, 3232, 3663, 3726, 1654, 1717, 3418, 3481, 1781, 1844, 3356, 3419, 59,
- 59, 2288, 2351, 3044, 3107, 3776, 3776, 60, 123, 1527, 1590, 3480, 3543,
- 3777, 3840, 699, 762, 3723, 3786, 124, 187, 1908, 1971, 3294, 3357, 3778,
- 3841, 188, 251, 3779, 3842, 1209, 1272, 3603, 3666, 2667, 2730, 2730, 2793,
- 252, 315, 3780, 3843, 2604, 2667, 2793, 2856, 1018, 1081, 1400, 1463, 3542,
- 3605, 3664, 3727, 316, 379, 763, 826, 2035, 2098, 2541, 2604, 2856, 2919,
- 3232, 3295, 3724, 3787, 3781, 3844, 2225, 2288, 3107, 3170, 380, 443, 3782,
- 3845, 2478, 2541, 2919, 2982, 1718, 1781, 3419, 3482, 444, 507, 1273, 1336,
- 3604, 3667, 3783, 3846, 827, 890, 1591, 1654, 1845, 1908, 3357, 3420, 3481,
- 3544, 3725, 3788, 1082, 1145, 2415, 2478, 2982, 3045, 3665, 3728, 2162, 2225,
- 3170, 3233, 508, 571, 3784, 3847, 1464, 1527, 1972, 2035, 3295, 3358, 3543,
- 3606, 2352, 2415, 3045, 3108, 891, 954, 3726, 3789, 572, 635, 3785, 3848,
- 1146, 1209, 3666, 3729, 1337, 1400, 2099, 2162, 3233, 3296, 3605, 3668, 2289,
- 2352, 3108, 3171, 2731, 2794, 636, 699, 1782, 1845, 2668, 2731, 2794, 2857,
- 3420, 3483, 3786, 3849, 1655, 1718, 3482, 3545, 955, 1018, 2605, 2668, 2857,
- 2920, 3727, 3790, 1909, 1972, 3358, 3421, 1528, 1591, 3544, 3607, 2542, 2605,
- 2920, 2983, 60, 60, 700, 763, 3787, 3850, 3840, 3840, 61, 124, 3841,
- 3904, 125, 188, 1210, 1273, 2226, 2289, 3171, 3234, 3667, 3730, 3842, 3905,
- 2036, 2099, 3296, 3359, 189, 252, 2479, 2542, 2983, 3046, 3843, 3906, 1401,
- 1464, 3606, 3669, 253, 316, 1019, 1082, 3728, 3791, 3844, 3907, 764, 827,
- 3788, 3851, 317, 380, 3845, 3908, 2416, 2479, 3046, 3109, 1719, 1782, 3483,
- 3546, 381, 444, 1846, 1909, 2163, 2226, 3234, 3297, 3421, 3484, 3846, 3909,
- 1592, 1655, 3545, 3608, 1274, 1337, 3668, 3731, 828, 891, 3789, 3852, 445,
- 508, 1083, 1146, 1973, 2036, 2353, 2416, 3109, 3172, 3359, 3422, 3729, 3792,
- 3847, 3910, 1465, 1528, 3607, 3670, 509, 572, 2732, 2795, 2795, 2858, 3848,
- 3911, 2669, 2732, 2858, 2921, 2100, 2163, 3297, 3360, 892, 955, 2290, 2353,
- 3172, 3235, 3790, 3853, 2606, 2669, 2921, 2984, 573, 636, 3849, 3912, 1147,
- 1210, 1338, 1401, 3669, 3732, 3730, 3793, 1783, 1846, 2543, 2606, 2984, 3047,
- 3484, 3547, 1656, 1719, 3546, 3609, 1910, 1973, 3422, 3485, 637, 700, 3850,
- 3913, 956, 1019, 1529, 1592, 2480, 2543, 3047, 3110, 3608, 3671, 3791, 3854,
- 2227, 2290, 3235, 3298, 2037, 2100, 3360, 3423, 701, 764, 1211, 1274, 3731,
- 3794, 3851, 3914, 61, 61, 3904, 3904, 62, 125, 2417, 2480, 3110, 3173,
- 3905, 3968, 126, 189, 1402, 1465, 3670, 3733, 3906, 3969, 190, 253, 3907,
- 3970, 1020, 1083, 3792, 3855, 254, 317, 2164, 2227, 3298, 3361, 3908, 3971,
- 765, 828, 1720, 1783, 3547, 3610, 3852, 3915, 1847, 1910, 3485, 3548, 318,
- 381, 2354, 2417, 3173, 3236, 3909, 3972, 2796, 2859, 1593, 1656, 2733, 2796,
- 2859, 2922, 3609, 3672, 1974, 2037, 3423, 3486, 382, 445, 2670, 2733, 2922,
- 2985, 3910, 3973, 1275, 1338, 3732, 3795, 1084, 1147, 3793, 3856, 829, 892,
- 2607, 2670, 2985, 3048, 3853, 3916, 446, 509, 1466, 1529, 3671, 3734, 3911,
- 3974, 2291, 2354, 3236, 3299, 2101, 2164, 3361, 3424, 2544, 2607, 3048, 3111,
- 510, 573, 3912, 3975, 893, 956, 3854, 3917, 1784, 1847, 3548, 3611, 1339,
- 1402, 2481, 2544, 3111, 3174, 3733, 3796, 1148, 1211, 3794, 3857, 574, 637,
- 1657, 1720, 1911, 1974, 3486, 3549, 3610, 3673, 3913, 3976, 2228, 2291, 3299,
- 3362, 1530, 1593, 2038, 2101, 3424, 3487, 3672, 3735, 638, 701, 2418, 2481,
- 3174, 3237, 3914, 3977, 957, 1020, 3855, 3918, 1212, 1275, 2797, 2860, 2860,
- 2923, 3795, 3858, 702, 765, 1403, 1466, 2165, 2228, 2734, 2797, 2923, 2986,
- 3362, 3425, 3734, 3797, 3915, 3978, 62, 62, 3968, 3968, 63, 126, 2355,
- 2418, 3237, 3300, 3969, 4032, 127, 190, 2671, 2734, 2986, 3049, 3970, 4033,
- 1021, 1084, 1848, 1911, 3549, 3612, 3856, 3919, 191, 254, 1721, 1784, 3611,
- 3674, 3971, 4034, 255, 318, 2608, 2671, 3049, 3112, 3972, 4035, 1975, 2038,
- 3487, 3550, 766, 829, 3916, 3979, 1594, 1657, 3673, 3736, 319, 382, 3973,
- 4036, 1276, 1339, 2292, 2355, 3300, 3363, 3796, 3859, 2545, 2608, 3112, 3175,
- 383, 446, 2102, 2165, 3425, 3488, 3974, 4037, 1085, 1148, 1467, 1530, 3735,
- 3798, 3857, 3920, 830, 893, 3917, 3980, 447, 510, 3975, 4038, 2482, 2545,
- 3175, 3238, 511, 574, 1785, 1848, 3612, 3675, 3976, 4039, 2229, 2292, 3363,
- 3426, 1912, 1975, 3550, 3613, 894, 957, 1658, 1721, 3674, 3737, 3918, 3981,
- 1340, 1403, 3797, 3860, 1149, 1212, 2419, 2482, 3238, 3301, 3858, 3921, 2039,
- 2102, 3488, 3551, 575, 638, 2861, 2924, 3977, 4040, 2798, 2861, 2924, 2987,
- 1531, 1594, 3736, 3799, 2735, 2798, 2987, 3050, 2672, 2735, 3050, 3113, 639,
- 702, 958, 1021, 3919, 3982, 3978, 4041, 2166, 2229, 3426, 3489, 2356, 2419,
- 3301, 3364, 1213, 1276, 2609, 2672, 3113, 3176, 3859, 3922, 1404, 1467, 3798,
- 3861, 703, 766, 1849, 1912, 3613, 3676, 3979, 4042, 1722, 1785, 3675, 3738,
- 1976, 2039, 3551, 3614, 1022, 1085, 2546, 2609, 3176, 3239, 3920, 3983, 2293,
- 2356, 3364, 3427, 1595, 1658, 3737, 3800, 767, 830, 3980, 4043, 2103, 2166,
- 3489, 3552, 1277, 1340, 3860, 3923, 2483, 2546, 3239, 3302, 1468, 1531, 3799,
- 3862, 1086, 1149, 3921, 3984, 831, 894, 3981, 4044, 2230, 2293, 2862, 2925,
- 2925, 2988, 3427, 3490, 2799, 2862, 2988, 3051, 1786, 1849, 2420, 2483, 3302,
- 3365, 3676, 3739, 1913, 1976, 3614, 3677, 2736, 2799, 3051, 3114, 1659, 1722,
- 3738, 3801, 2040, 2103, 3552, 3615, 1341, 1404, 3861, 3924, 895, 958, 2673,
- 2736, 3114, 3177, 3982, 4045, 1150, 1213, 3922, 3985, 1532, 1595, 3800, 3863,
- 2357, 2420, 3365, 3428, 2167, 2230, 2610, 2673, 3177, 3240, 3490, 3553, 959,
- 1022, 3983, 4046, 2547, 2610, 3240, 3303, 1214, 1277, 1405, 1468, 1850, 1913,
- 3677, 3740, 3862, 3925, 3923, 3986, 1723, 1786, 1977, 2040, 3615, 3678, 3739,
- 3802, 2294, 2357, 3428, 3491, 1023, 1086, 1596, 1659, 2104, 2167, 2484, 2547,
- 3303, 3366, 3553, 3616, 3801, 3864, 3984, 4047, 2926, 2989, 2863, 2926, 2989,
- 3052, 2800, 2863, 3052, 3115, 1278, 1341, 3924, 3987, 1469, 1532, 2231, 2294,
- 2737, 2800, 3115, 3178, 3491, 3554, 3863, 3926, 2421, 2484, 3366, 3429, 1087,
- 1150, 3985, 4048, 1914, 1977, 2674, 2737, 3178, 3241, 3678, 3741, 1787, 1850,
- 3740, 3803, 2041, 2104, 3616, 3679, 1660, 1723, 3802, 3865, 2611, 2674, 3241,
- 3304, 1342, 1405, 2358, 2421, 3429, 3492, 3925, 3988, 2168, 2231, 3554, 3617,
- 1151, 1214, 3986, 4049, 1533, 1596, 3864, 3927, 2548, 2611, 3304, 3367, 2295,
- 2358, 3492, 3555, 1851, 1914, 3741, 3804, 1978, 2041, 2927, 2990, 2990, 3053,
- 3679, 3742, 1406, 1469, 3926, 3989, 1724, 1787, 2864, 2927, 3053, 3116, 3803,
- 3866, 1215, 1278, 2485, 2548, 3367, 3430, 3987, 4050, 2801, 2864, 3116, 3179,
- 2105, 2168, 3617, 3680, 1597, 1660, 3865, 3928, 2738, 2801, 3179, 3242, 2422,
- 2485, 3430, 3493, 2232, 2295, 3555, 3618, 2675, 2738, 3242, 3305, 1279, 1342,
- 3988, 4051, 1470, 1533, 3927, 3990, 1915, 1978, 3742, 3805, 1788, 1851, 3804,
- 3867, 2612, 2675, 3305, 3368, 2042, 2105, 3680, 3743, 2359, 2422, 3493, 3556,
- 1661, 1724, 3866, 3929, 2169, 2232, 3618, 3681, 2549, 2612, 3368, 3431, 1343,
- 1406, 3989, 4052, 2991, 3054, 1534, 1597, 2928, 2991, 3054, 3117, 3928, 3991,
- 2865, 2928, 3117, 3180, 2296, 2359, 3556, 3619, 2802, 2865, 3180, 3243, 2486,
- 2549, 3431, 3494, 1852, 1915, 3805, 3868, 1979, 2042, 3743, 3806, 1725, 1788,
- 2739, 2802, 3243, 3306, 3867, 3930, 1407, 1470, 2106, 2169, 3681, 3744, 3990,
- 4053, 2676, 2739, 3306, 3369, 1598, 1661, 2423, 2486, 3494, 3557, 3929, 3992,
- 2233, 2296, 3619, 3682, 2613, 2676, 3369, 3432, 1471, 1534, 3991, 4054, 1916,
- 1979, 3806, 3869, 1789, 1852, 2043, 2106, 2360, 2423, 3557, 3620, 3744, 3807,
- 3868, 3931, 2992, 3055, 3055, 3118, 2550, 2613, 3432, 3495, 2929, 2992, 3118,
- 3181, 1662, 1725, 2170, 2233, 3682, 3745, 3930, 3993, 2866, 2929, 3181, 3244,
- 2803, 2866, 3244, 3307, 1535, 1598, 2297, 2360, 3620, 3683, 3992, 4055, 2487,
- 2550, 3495, 3558, 2740, 2803, 3307, 3370, 1980, 2043, 3807, 3870, 1853, 1916,
- 3869, 3932, 2107, 2170, 3745, 3808, 1726, 1789, 2677, 2740, 3370, 3433, 3931,
- 3994, 2424, 2487, 3558, 3621, 2234, 2297, 3683, 3746, 1599, 1662, 3993, 4056,
- 2614, 2677, 3433, 3496, 3056, 3119, 2993, 3056, 3119, 3182, 2930, 2993, 3182,
- 3245, 2361, 2424, 3621, 3684, 1917, 1980, 3870, 3933, 2044, 2107, 3808, 3871,
- 2551, 2614, 3496, 3559, 2867, 2930, 3245, 3308, 1790, 1853, 3932, 3995, 2171,
- 2234, 3746, 3809, 2804, 2867, 3308, 3371, 1663, 1726, 3994, 4057, 2488, 2551,
- 3559, 3622, 2741, 2804, 3371, 3434, 2298, 2361, 3684, 3747, 2678, 2741, 3434,
- 3497, 1981, 2044, 3871, 3934, 1854, 1917, 3933, 3996, 2108, 2171, 3809, 3872,
- 2425, 2488, 3622, 3685, 1727, 1790, 3995, 4058, 3057, 3120, 3120, 3183, 2235,
- 2298, 2615, 2678, 3497, 3560, 3747, 3810, 2994, 3057, 3183, 3246, 2931, 2994,
- 3246, 3309, 2868, 2931, 3309, 3372, 2362, 2425, 3685, 3748, 2552, 2615, 3560,
- 3623, 1918, 1981, 3934, 3997, 2045, 2108, 2805, 2868, 3372, 3435, 3872, 3935,
- 1791, 1854, 3996, 4059, 2172, 2235, 3810, 3873, 2742, 2805, 3435, 3498, 2489,
- 2552, 3623, 3686, 2299, 2362, 3748, 3811, 2679, 2742, 3498, 3561, 3121, 3184,
- 3058, 3121, 3184, 3247, 1982, 2045, 3935, 3998, 2426, 2489, 3686, 3749, 1855,
- 1918, 2109, 2172, 2995, 3058, 3247, 3310, 3873, 3936, 3997, 4060, 2616, 2679,
- 3561, 3624, 2932, 2995, 3310, 3373, 2236, 2299, 3811, 3874, 2869, 2932, 3373,
- 3436, 2553, 2616, 3624, 3687, 2363, 2426, 3749, 3812, 2806, 2869, 3436, 3499,
- 2046, 2109, 3936, 3999, 1919, 1982, 3998, 4061, 2743, 2806, 3499, 3562, 2173,
- 2236, 3874, 3937, 2490, 2553, 3687, 3750, 2300, 2363, 3812, 3875, 2680, 2743,
- 3562, 3625, 3122, 3185, 3185, 3248, 3059, 3122, 3248, 3311, 2996, 3059, 3311,
- 3374, 2427, 2490, 2933, 2996, 3374, 3437, 3750, 3813, 1983, 2046, 2617, 2680,
- 3625, 3688, 3999, 4062, 2110, 2173, 3937, 4000, 2870, 2933, 3437, 3500, 2237,
- 2300, 3875, 3938, 2807, 2870, 3500, 3563, 2554, 2617, 3688, 3751, 2364, 2427,
- 3813, 3876, 2744, 2807, 3563, 3626, 2047, 2110, 4000, 4063, 2174, 2237, 3186,
- 3249, 3938, 4001, 2491, 2554, 3123, 3186, 3249, 3312, 3751, 3814, 3060, 3123,
- 3312, 3375, 2681, 2744, 3626, 3689, 2301, 2364, 3876, 3939, 2997, 3060, 3375,
- 3438, 2934, 2997, 3438, 3501, 2428, 2491, 3814, 3877, 2618, 2681, 3689, 3752,
- 2871, 2934, 3501, 3564, 2111, 2174, 4001, 4064, 2238, 2301, 3939, 4002, 2808,
- 2871, 3564, 3627, 2555, 2618, 3752, 3815, 2365, 2428, 3877, 3940, 2745, 2808,
- 3627, 3690, 3187, 3250, 3250, 3313, 3124, 3187, 3313, 3376, 3061, 3124, 3376,
- 3439, 2492, 2555, 3815, 3878, 2175, 2238, 2998, 3061, 3439, 3502, 4002, 4065,
- 2682, 2745, 3690, 3753, 2302, 2365, 3940, 4003, 2935, 2998, 3502, 3565, 2872,
- 2935, 3565, 3628, 2619, 2682, 3753, 3816, 2429, 2492, 3878, 3941, 2809, 2872,
- 3628, 3691, 2239, 2302, 4003, 4066, 2556, 2619, 3816, 3879, 3251, 3314, 3188,
- 3251, 3314, 3377, 3125, 3188, 3377, 3440, 2366, 2429, 2746, 2809, 3691, 3754,
- 3941, 4004, 3062, 3125, 3440, 3503, 2999, 3062, 3503, 3566, 2493, 2556, 3879,
- 3942, 2683, 2746, 3754, 3817, 2936, 2999, 3566, 3629, 2303, 2366, 4004, 4067,
- 2873, 2936, 3629, 3692, 2620, 2683, 3817, 3880, 2430, 2493, 3942, 4005, 2810,
- 2873, 3692, 3755, 3252, 3315, 3315, 3378, 3189, 3252, 3378, 3441, 3126, 3189,
- 3441, 3504, 2557, 2620, 3880, 3943, 3063, 3126, 3504, 3567, 2747, 2810, 3755,
- 3818, 2367, 2430, 4005, 4068, 3000, 3063, 3567, 3630, 2684, 2747, 3818, 3881,
- 2494, 2557, 2937, 3000, 3630, 3693, 3943, 4006, 2874, 2937, 3693, 3756, 2621,
- 2684, 3881, 3944, 3316, 3379, 3253, 3316, 3379, 3442, 2431, 2494, 4006, 4069,
- 3190, 3253, 3442, 3505, 2811, 2874, 3756, 3819, 3127, 3190, 3505, 3568, 3064,
- 3127, 3568, 3631, 2558, 2621, 3944, 4007, 2748, 2811, 3819, 3882, 3001, 3064,
- 3631, 3694, 2938, 3001, 3694, 3757, 2685, 2748, 3882, 3945, 2495, 2558, 4007,
- 4070, 2875, 2938, 3757, 3820, 3317, 3380, 3380, 3443, 3254, 3317, 3443, 3506,
- 2622, 2685, 3191, 3254, 3506, 3569, 3945, 4008, 2812, 2875, 3820, 3883, 3128,
- 3191, 3569, 3632, 3065, 3128, 3632, 3695, 2559, 2622, 4008, 4071, 2749, 2812,
- 3883, 3946, 3002, 3065, 3695, 3758, 2939, 3002, 3758, 3821, 2686, 2749, 3946,
- 4009, 3381, 3444, 3318, 3381, 3444, 3507, 2876, 2939, 3821, 3884, 3255, 3318,
- 3507, 3570, 3192, 3255, 3570, 3633, 2623, 2686, 3129, 3192, 3633, 3696, 4009,
- 4072, 2813, 2876, 3884, 3947, 3066, 3129, 3696, 3759, 3003, 3066, 3759, 3822,
- 2750, 2813, 3947, 4010, 2940, 3003, 3822, 3885, 3382, 3445, 3445, 3508, 3319,
- 3382, 3508, 3571, 2687, 2750, 4010, 4073, 3256, 3319, 3571, 3634, 2877, 2940,
- 3885, 3948, 3193, 3256, 3634, 3697, 3130, 3193, 3697, 3760, 2814, 2877, 3948,
- 4011, 3067, 3130, 3760, 3823, 3004, 3067, 3823, 3886, 2751, 2814, 4011, 4074,
- 3446, 3509, 3383, 3446, 3509, 3572, 2941, 3004, 3886, 3949, 3320, 3383, 3572,
- 3635, 3257, 3320, 3635, 3698, 3194, 3257, 3698, 3761, 2878, 2941, 3949, 4012,
- 3131, 3194, 3761, 3824, 3068, 3131, 3824, 3887, 2815, 2878, 4012, 4075, 3005,
- 3068, 3887, 3950, 3447, 3510, 3510, 3573, 3384, 3447, 3573, 3636, 3321, 3384,
- 3636, 3699, 2942, 3005, 3950, 4013, 3258, 3321, 3699, 3762, 3195, 3258, 3762,
- 3825, 2879, 2942, 4013, 4076, 3132, 3195, 3825, 3888, 3069, 3132, 3888, 3951,
- 3511, 3574, 3448, 3511, 3574, 3637, 3006, 3069, 3951, 4014, 3385, 3448, 3637,
- 3700, 3322, 3385, 3700, 3763, 3259, 3322, 3763, 3826, 2943, 3006, 4014, 4077,
- 3196, 3259, 3826, 3889, 3133, 3196, 3889, 3952, 3070, 3133, 3952, 4015, 3512,
- 3575, 3575, 3638, 3449, 3512, 3638, 3701, 3386, 3449, 3701, 3764, 3007, 3070,
- 4015, 4078, 3323, 3386, 3764, 3827, 3260, 3323, 3827, 3890, 3197, 3260, 3890,
- 3953, 3134, 3197, 3953, 4016, 3576, 3639, 3071, 3134, 4016, 4079, 3513, 3576,
- 3639, 3702, 3450, 3513, 3702, 3765, 3387, 3450, 3765, 3828, 3324, 3387, 3828,
- 3891, 3261, 3324, 3891, 3954, 3198, 3261, 3954, 4017, 3135, 3198, 4017, 4080,
- 3577, 3640, 3640, 3703, 3514, 3577, 3703, 3766, 3451, 3514, 3766, 3829, 3388,
- 3451, 3829, 3892, 3325, 3388, 3892, 3955, 3262, 3325, 3955, 4018, 3199, 3262,
- 4018, 4081, 3641, 3704, 3578, 3641, 3704, 3767, 3515, 3578, 3767, 3830, 3452,
- 3515, 3830, 3893, 3389, 3452, 3893, 3956, 3326, 3389, 3956, 4019, 3263, 3326,
- 4019, 4082, 3642, 3705, 3705, 3768, 3579, 3642, 3768, 3831, 3516, 3579, 3831,
- 3894, 3453, 3516, 3894, 3957, 3390, 3453, 3957, 4020, 3327, 3390, 4020, 4083,
- 3706, 3769, 3643, 3706, 3769, 3832, 3580, 3643, 3832, 3895, 3517, 3580, 3895,
- 3958, 3454, 3517, 3958, 4021, 3391, 3454, 4021, 4084, 3707, 3770, 3770, 3833,
- 3644, 3707, 3833, 3896, 3581, 3644, 3896, 3959, 3518, 3581, 3959, 4022, 3455,
- 3518, 4022, 4085, 3771, 3834, 3708, 3771, 3834, 3897, 3645, 3708, 3897, 3960,
- 3582, 3645, 3960, 4023, 3519, 3582, 4023, 4086, 3772, 3835, 3835, 3898, 3709,
- 3772, 3898, 3961, 3646, 3709, 3961, 4024, 3583, 3646, 4024, 4087, 3836, 3899,
- 3773, 3836, 3899, 3962, 3710, 3773, 3962, 4025, 3647, 3710, 4025, 4088, 3837,
- 3900, 3900, 3963, 3774, 3837, 3963, 4026, 3711, 3774, 4026, 4089, 3901, 3964,
- 3838, 3901, 3964, 4027, 3775, 3838, 4027, 4090, 3902, 3965, 3965, 4028, 3839,
- 3902, 4028, 4091, 3966, 4029, 3903, 3966, 4029, 4092, 3967, 4030, 4030, 4093,
- 4031, 4094, 0, 0,
-};
-#endif // CONFIG_TX64X64
-
-#if CONFIG_CHROMA_2X2
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_2x2[4]) = { 0, 1, 2,
- 3 };
-#endif
-
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x4[16]) = {
- 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15,
+ 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = {
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
};
@@ -5228,19 +2387,10 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = {
DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x4[16]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, av1_col_iscan_4x4[16]) = {
- 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_row_iscan_4x4[16]) = {
- 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15,
-};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = {
- 0, 1, 4, 9, 2, 3, 6, 11, 5, 7, 8, 13, 10, 12, 14, 17,
- 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18,
+ 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 29, 25, 28, 30, 31,
};
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = {
@@ -5254,8 +2404,8 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x8[32]) = {
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x4[32]) = {
- 0, 1, 4, 9, 15, 19, 24, 28, 2, 3, 6, 11, 16, 21, 25, 29,
- 5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31,
+ 0, 2, 5, 9, 13, 17, 21, 25, 1, 4, 8, 12, 16, 20, 24, 28,
+ 3, 7, 11, 15, 19, 23, 27, 30, 6, 10, 14, 18, 22, 26, 29, 31,
};
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x4[32]) = {
@@ -5269,20 +2419,19 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x4[32]) = {
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = {
- 0, 1, 4, 9, 2, 3, 6, 11, 5, 7, 8, 13, 10, 12, 14, 17,
- 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18,
+ 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 30, 25, 28, 31, 34,
+ 29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50,
+ 45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63,
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = {
- 0, 1, 4, 9, 15, 19, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
- 2, 3, 6, 11, 16, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61,
- 5, 7, 8, 13, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
- 10, 12, 14, 17, 20, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
+ 0, 2, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57,
+ 1, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
+ 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 62,
+ 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 61, 63,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x16[64]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -5310,7 +2459,6 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x4[64]) = {
2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = {
0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29,
@@ -5330,30 +2478,30 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = {
201, 208, 215, 222, 229, 235, 195, 202, 209, 216, 223, 230, 236, 241, 203,
210, 217, 224, 231, 237, 242, 246, 211, 218, 225, 232, 238, 243, 247, 250,
219, 226, 233, 239, 244, 248, 251, 253, 227, 234, 240, 245, 249, 252, 254,
+ 255,
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x8[256]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 44, 52, 60, 68, 76, 84,
- 92, 100, 108, 116, 124, 132, 140, 148, 156, 164, 172, 180, 188, 196, 204,
- 212, 220, 2, 4, 7, 11, 16, 22, 29, 37, 45, 53, 61, 69, 77,
- 85, 93, 101, 109, 117, 125, 133, 141, 149, 157, 165, 173, 181, 189, 197,
- 205, 213, 221, 228, 5, 8, 12, 17, 23, 30, 38, 46, 54, 62, 70,
- 78, 86, 94, 102, 110, 118, 126, 134, 142, 150, 158, 166, 174, 182, 190,
- 198, 206, 214, 222, 229, 235, 9, 13, 18, 24, 31, 39, 47, 55, 63,
- 71, 79, 87, 95, 103, 111, 119, 127, 135, 143, 151, 159, 167, 175, 183,
- 191, 199, 207, 215, 223, 230, 236, 241, 14, 19, 25, 32, 40, 48, 56,
- 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176,
- 184, 192, 200, 208, 216, 224, 231, 237, 242, 246, 20, 26, 33, 41, 49,
- 57, 65, 73, 81, 89, 97, 105, 113, 121, 129, 137, 145, 153, 161, 169,
- 177, 185, 193, 201, 209, 217, 225, 232, 238, 243, 247, 250, 27, 34, 42,
- 50, 58, 66, 74, 82, 90, 98, 106, 114, 122, 130, 138, 146, 154, 162,
- 170, 178, 186, 194, 202, 210, 218, 226, 233, 239, 244, 248, 251, 253, 35,
- 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123, 131, 139, 147, 155,
- 163, 171, 179, 187, 195, 203, 211, 219, 227, 234, 240, 245, 249, 252, 254,
+ 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91,
+ 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187, 195, 203, 211,
+ 219, 227, 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82,
+ 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202,
+ 210, 218, 226, 234, 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73,
+ 81, 89, 97, 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193,
+ 201, 209, 217, 225, 233, 240, 6, 11, 17, 24, 32, 40, 48, 56, 64,
+ 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 239, 245, 10, 16, 23, 31, 39, 47, 55,
+ 63, 71, 79, 87, 95, 103, 111, 119, 127, 135, 143, 151, 159, 167, 175,
+ 183, 191, 199, 207, 215, 223, 231, 238, 244, 249, 15, 22, 30, 38, 46,
+ 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142, 150, 158, 166,
+ 174, 182, 190, 198, 206, 214, 222, 230, 237, 243, 248, 252, 21, 29, 37,
+ 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157,
+ 165, 173, 181, 189, 197, 205, 213, 221, 229, 236, 242, 247, 251, 254, 28,
+ 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148,
+ 156, 164, 172, 180, 188, 196, 204, 212, 220, 228, 235, 241, 246, 250, 253,
255,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x32[256]) = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
@@ -5435,9 +2583,7 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x8[256]) = {
135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247,
255,
};
-#endif // CONFIG_EXT_TX
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x8[64]) = {
0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
@@ -5451,27 +2597,12 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x8[64]) = {
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, av1_col_iscan_8x8[64]) = {
- 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51,
- 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56,
- 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60,
- 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_row_iscan_8x8[64]) = {
- 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39,
- 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52,
- 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59,
- 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63,
-};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = {
- 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44,
- 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53,
- 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60,
- 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63,
+ 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = {
@@ -5486,14 +2617,14 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = {
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 44, 52, 60, 68, 76, 84, 92,
- 2, 4, 7, 11, 16, 22, 29, 37, 45, 53, 61, 69, 77, 85, 93, 100,
- 5, 8, 12, 17, 23, 30, 38, 46, 54, 62, 70, 78, 86, 94, 101, 107,
- 9, 13, 18, 24, 31, 39, 47, 55, 63, 71, 79, 87, 95, 102, 108, 113,
- 14, 19, 25, 32, 40, 48, 56, 64, 72, 80, 88, 96, 103, 109, 114, 118,
- 20, 26, 33, 41, 49, 57, 65, 73, 81, 89, 97, 104, 110, 115, 119, 122,
- 27, 34, 42, 50, 58, 66, 74, 82, 90, 98, 105, 111, 116, 120, 123, 125,
- 35, 43, 51, 59, 67, 75, 83, 91, 99, 106, 112, 117, 121, 124, 126, 127,
+ 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99,
+ 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106,
+ 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 112,
+ 6, 11, 17, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 111, 117,
+ 10, 16, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 110, 116, 121,
+ 15, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 109, 115, 120, 124,
+ 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 108, 114, 119, 123, 126,
+ 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 107, 113, 118, 122, 125, 127,
};
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x16[128]) = {
@@ -5581,41 +2712,41 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = {
};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
- 120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344,
- 360, 376, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92,
- 106, 121, 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329,
- 345, 361, 377, 392, 5, 8, 12, 17, 23, 30, 38, 47, 57, 68, 80,
- 93, 107, 122, 138, 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314,
- 330, 346, 362, 378, 393, 407, 9, 13, 18, 24, 31, 39, 48, 58, 69,
- 81, 94, 108, 123, 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299,
- 315, 331, 347, 363, 379, 394, 408, 421, 14, 19, 25, 32, 40, 49, 59,
- 70, 82, 95, 109, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284,
- 300, 316, 332, 348, 364, 380, 395, 409, 422, 434, 20, 26, 33, 41, 50,
- 60, 71, 83, 96, 110, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269,
- 285, 301, 317, 333, 349, 365, 381, 396, 410, 423, 435, 446, 27, 34, 42,
- 51, 61, 72, 84, 97, 111, 126, 142, 158, 174, 190, 206, 222, 238, 254,
- 270, 286, 302, 318, 334, 350, 366, 382, 397, 411, 424, 436, 447, 457, 35,
- 43, 52, 62, 73, 85, 98, 112, 127, 143, 159, 175, 191, 207, 223, 239,
- 255, 271, 287, 303, 319, 335, 351, 367, 383, 398, 412, 425, 437, 448, 458,
- 467, 44, 53, 63, 74, 86, 99, 113, 128, 144, 160, 176, 192, 208, 224,
- 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 399, 413, 426, 438, 449,
- 459, 468, 476, 54, 64, 75, 87, 100, 114, 129, 145, 161, 177, 193, 209,
- 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 400, 414, 427, 439,
- 450, 460, 469, 477, 484, 65, 76, 88, 101, 115, 130, 146, 162, 178, 194,
- 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 401, 415, 428,
- 440, 451, 461, 470, 478, 485, 491, 77, 89, 102, 116, 131, 147, 163, 179,
- 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371, 387, 402, 416,
- 429, 441, 452, 462, 471, 479, 486, 492, 497, 90, 103, 117, 132, 148, 164,
- 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340, 356, 372, 388, 403,
- 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 104, 118, 133, 149,
- 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325, 341, 357, 373, 389,
- 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506, 119, 134,
- 150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342, 358, 374,
- 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507, 509,
+ 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119,
135, 151, 167, 183, 199, 215, 231, 247, 263, 279, 295, 311, 327, 343, 359,
- 375, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
- 510, 511,
+ 375, 391, 1, 4, 8, 13, 19, 26, 34, 43, 53, 64, 76, 89, 103,
+ 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342,
+ 358, 374, 390, 406, 3, 7, 12, 18, 25, 33, 42, 52, 63, 75, 88,
+ 102, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325,
+ 341, 357, 373, 389, 405, 420, 6, 11, 17, 24, 32, 41, 51, 62, 74,
+ 87, 101, 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308,
+ 324, 340, 356, 372, 388, 404, 419, 433, 10, 16, 23, 31, 40, 50, 61,
+ 73, 86, 100, 115, 131, 147, 163, 179, 195, 211, 227, 243, 259, 275, 291,
+ 307, 323, 339, 355, 371, 387, 403, 418, 432, 445, 15, 22, 30, 39, 49,
+ 60, 72, 85, 99, 114, 130, 146, 162, 178, 194, 210, 226, 242, 258, 274,
+ 290, 306, 322, 338, 354, 370, 386, 402, 417, 431, 444, 456, 21, 29, 38,
+ 48, 59, 71, 84, 98, 113, 129, 145, 161, 177, 193, 209, 225, 241, 257,
+ 273, 289, 305, 321, 337, 353, 369, 385, 401, 416, 430, 443, 455, 466, 28,
+ 37, 47, 58, 70, 83, 97, 112, 128, 144, 160, 176, 192, 208, 224, 240,
+ 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 415, 429, 442, 454, 465,
+ 475, 36, 46, 57, 69, 82, 96, 111, 127, 143, 159, 175, 191, 207, 223,
+ 239, 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 414, 428, 441, 453,
+ 464, 474, 483, 45, 56, 68, 81, 95, 110, 126, 142, 158, 174, 190, 206,
+ 222, 238, 254, 270, 286, 302, 318, 334, 350, 366, 382, 398, 413, 427, 440,
+ 452, 463, 473, 482, 490, 55, 67, 80, 94, 109, 125, 141, 157, 173, 189,
+ 205, 221, 237, 253, 269, 285, 301, 317, 333, 349, 365, 381, 397, 412, 426,
+ 439, 451, 462, 472, 481, 489, 496, 66, 79, 93, 108, 124, 140, 156, 172,
+ 188, 204, 220, 236, 252, 268, 284, 300, 316, 332, 348, 364, 380, 396, 411,
+ 425, 438, 450, 461, 471, 480, 488, 495, 501, 78, 92, 107, 123, 139, 155,
+ 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363, 379, 395,
+ 410, 424, 437, 449, 460, 470, 479, 487, 494, 500, 505, 91, 106, 122, 138,
+ 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378,
+ 394, 409, 423, 436, 448, 459, 469, 478, 486, 493, 499, 504, 508, 105, 121,
+ 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361,
+ 377, 393, 408, 422, 435, 447, 458, 468, 477, 485, 492, 498, 503, 507, 510,
+ 120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344,
+ 360, 376, 392, 407, 421, 434, 446, 457, 467, 476, 484, 491, 497, 502, 506,
+ 509, 511,
};
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x32[512]) = {
@@ -5767,7 +2898,6 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x16[512]) = {
510, 511,
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x16[256]) = {
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
@@ -5807,70 +2937,28 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x16[256]) = {
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255,
};
-#endif // CONFIG_EXT_TX
-
-DECLARE_ALIGNED(16, static const int16_t, av1_col_iscan_16x16[256]) = {
- 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198,
- 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212,
- 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216,
- 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218,
- 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223,
- 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228,
- 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230,
- 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235,
- 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237,
- 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240,
- 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244,
- 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247,
- 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251,
- 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253,
- 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254,
- 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_row_iscan_16x16[256]) = {
- 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76,
- 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99,
- 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103,
- 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100,
- 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94,
- 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96,
- 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98,
- 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102,
- 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108,
- 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114,
- 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123,
- 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129,
- 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137,
- 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149,
- 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152,
- 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253,
- 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254,
- 255,
-};
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x16[256]) = {
- 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166,
- 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154,
- 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148,
- 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127,
- 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114,
- 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106,
- 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99,
- 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95,
- 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93,
- 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94,
- 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97,
- 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102,
- 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111,
- 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115,
- 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121,
- 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254,
- 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253,
- 255,
+ 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
+ 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
+ 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
+ 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
+ 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
+ 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
+ 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
+ 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
+ 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
+ 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
+ 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
+ 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
+ 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
+ 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
+ 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
+ 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
+ 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
+ 255
};
-#if CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x32[1024]) = {
0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
@@ -6029,1423 +3117,118 @@ DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x32[1024]) = {
1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
};
-#endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x32[1024]) = {
- 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145,
- 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356,
- 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45,
- 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237,
- 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3,
- 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189,
- 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425,
- 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73,
- 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296,
- 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14,
- 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214,
- 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523,
- 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105,
- 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373,
- 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33,
- 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227,
- 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615,
- 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139,
- 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437,
- 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56,
- 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340,
- 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705,
- 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177,
- 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545,
- 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89,
- 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395,
- 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764,
- 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207,
- 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638,
- 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122,
- 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486,
- 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833,
- 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349,
- 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719,
- 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169,
- 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561,
- 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117,
- 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415,
- 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774,
- 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283,
- 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691,
- 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166,
- 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510,
- 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845,
- 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363,
- 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737,
- 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206,
- 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571,
- 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900,
- 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461,
- 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807,
- 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302,
- 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698,
- 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944,
- 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535,
- 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854,
- 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382,
- 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754,
- 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971,
- 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625,
- 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929,
- 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476,
- 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820,
- 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001,
- 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709,
- 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951,
- 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554,
- 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860,
- 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448,
- 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790,
- 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984,
- 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648,
- 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931,
- 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497,
- 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835,
- 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003,
- 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724,
- 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958,
- 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_v2_iscan_32x32[1024]) = {
- 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
- 142, 166, 189, 512, 518, 527, 539, 551, 566, 584, 602, 621, 644,
- 668, 695, 721, 748, 780, 811, 2, 3, 6, 11, 17, 26, 35,
- 45, 58, 73, 90, 106, 123, 146, 168, 193, 513, 519, 528, 540,
- 553, 567, 585, 603, 622, 647, 670, 696, 722, 751, 783, 812, 5,
- 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
- 170, 195, 514, 521, 530, 541, 554, 569, 587, 605, 625, 649, 671,
- 699, 725, 752, 785, 815, 10, 12, 14, 19, 23, 31, 41, 52,
- 65, 81, 96, 113, 133, 152, 175, 201, 515, 522, 531, 542, 556,
- 572, 589, 607, 629, 651, 673, 700, 726, 757, 788, 819, 16, 18,
- 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
- 203, 516, 523, 534, 545, 559, 574, 591, 610, 632, 654, 679, 704,
- 730, 762, 791, 824, 25, 27, 29, 32, 40, 46, 54, 67, 79,
- 94, 109, 127, 143, 164, 185, 210, 517, 525, 535, 547, 561, 578,
- 595, 615, 635, 656, 684, 707, 737, 766, 793, 830, 34, 36, 38,
- 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219,
- 520, 529, 538, 550, 565, 580, 598, 618, 639, 664, 687, 712, 741,
- 769, 802, 833, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
- 131, 147, 162, 183, 208, 227, 524, 533, 544, 557, 571, 588, 606,
- 623, 645, 667, 692, 720, 747, 776, 806, 838, 57, 61, 63, 66,
- 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 216, 233, 526,
- 536, 548, 562, 577, 593, 613, 633, 653, 676, 701, 727, 756, 786,
- 814, 847, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
- 173, 190, 211, 229, 246, 532, 543, 555, 568, 581, 601, 619, 637,
- 663, 685, 709, 738, 763, 792, 826, 855, 89, 91, 93, 97, 101,
- 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 255, 537, 549,
- 560, 576, 592, 608, 628, 650, 669, 693, 719, 744, 773, 805, 834,
- 862, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
- 221, 236, 251, 267, 546, 558, 570, 583, 600, 617, 636, 657, 680,
- 706, 729, 758, 787, 813, 846, 871, 122, 126, 130, 134, 138, 144,
- 155, 163, 180, 191, 207, 222, 232, 248, 264, 278, 552, 564, 579,
- 594, 609, 630, 648, 666, 688, 715, 742, 768, 797, 827, 856, 877,
- 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 237, 249,
- 262, 275, 289, 563, 575, 590, 604, 620, 638, 660, 683, 705, 728,
- 753, 779, 809, 839, 866, 889, 167, 169, 172, 178, 182, 188, 198,
- 209, 217, 230, 242, 252, 265, 276, 288, 301, 573, 586, 599, 616,
- 634, 652, 672, 694, 716, 743, 767, 794, 825, 850, 874, 899, 192,
- 194, 196, 202, 204, 213, 220, 228, 234, 247, 256, 268, 279, 290,
- 302, 315, 582, 597, 614, 631, 646, 665, 686, 708, 732, 759, 784,
- 810, 837, 863, 886, 908, 214, 215, 218, 223, 226, 231, 239, 244,
- 253, 261, 271, 283, 292, 304, 317, 325, 596, 611, 626, 642, 661,
- 681, 702, 723, 745, 770, 800, 828, 853, 875, 897, 919, 235, 238,
- 240, 243, 245, 250, 257, 263, 270, 280, 287, 298, 307, 319, 329,
- 340, 612, 624, 640, 658, 677, 697, 717, 739, 764, 789, 816, 844,
- 867, 890, 909, 927, 254, 258, 259, 260, 266, 269, 272, 282, 286,
- 296, 303, 312, 323, 333, 341, 355, 627, 641, 655, 674, 690, 713,
- 735, 760, 781, 807, 835, 857, 880, 902, 921, 940, 273, 274, 277,
- 281, 284, 285, 291, 299, 305, 310, 320, 327, 337, 346, 357, 369,
- 643, 659, 675, 689, 710, 733, 754, 777, 803, 831, 851, 872, 892,
- 913, 934, 950, 293, 294, 295, 297, 300, 306, 308, 314, 321, 326,
- 335, 343, 352, 361, 372, 378, 662, 678, 691, 711, 731, 749, 774,
- 798, 822, 848, 869, 887, 906, 925, 942, 961, 309, 311, 313, 316,
- 318, 322, 324, 332, 338, 344, 351, 358, 367, 375, 386, 394, 682,
- 698, 714, 734, 750, 772, 795, 820, 842, 864, 884, 904, 923, 938,
- 954, 967, 328, 330, 331, 334, 336, 339, 342, 348, 354, 359, 366,
- 374, 382, 391, 400, 409, 703, 718, 736, 755, 775, 796, 818, 840,
- 860, 882, 900, 917, 936, 952, 965, 977, 345, 347, 349, 350, 353,
- 356, 360, 364, 371, 376, 383, 389, 395, 406, 412, 423, 724, 740,
- 761, 778, 799, 821, 841, 859, 878, 895, 915, 932, 948, 963, 975,
- 986, 362, 363, 365, 368, 370, 373, 377, 379, 387, 392, 397, 405,
- 411, 420, 428, 439, 746, 765, 782, 804, 823, 843, 861, 879, 894,
- 911, 930, 946, 959, 973, 984, 994, 380, 381, 384, 385, 388, 390,
- 393, 396, 403, 408, 413, 422, 427, 436, 444, 452, 771, 790, 808,
- 832, 849, 865, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001,
- 398, 399, 401, 402, 404, 407, 410, 414, 419, 425, 429, 437, 442,
- 449, 458, 465, 801, 817, 836, 852, 870, 885, 901, 916, 931, 945,
- 956, 969, 980, 990, 999, 1007, 415, 416, 417, 418, 421, 424, 426,
- 430, 434, 441, 445, 453, 459, 463, 473, 480, 829, 845, 858, 873,
- 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 431,
- 432, 433, 435, 438, 440, 443, 446, 451, 456, 461, 468, 475, 479,
- 488, 494, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981,
- 989, 996, 1003, 1010, 1016, 447, 448, 450, 454, 455, 457, 460, 462,
- 469, 472, 477, 482, 490, 495, 499, 503, 876, 891, 903, 914, 926,
- 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 464, 466,
- 467, 470, 471, 474, 476, 478, 484, 489, 493, 497, 501, 504, 506,
- 508, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
- 1011, 1015, 1018, 1021, 481, 483, 485, 486, 487, 491, 492, 496, 498,
- 500, 502, 505, 507, 509, 510, 511, 920, 929, 941, 951, 962, 968,
- 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_h2_iscan_32x32[1024]) = {
- 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
- 142, 166, 189, 214, 233, 254, 273, 292, 309, 328, 345, 362, 378,
- 397, 415, 431, 447, 464, 481, 2, 3, 6, 11, 17, 26, 35,
- 45, 58, 73, 90, 106, 123, 146, 168, 193, 215, 236, 255, 274,
- 294, 310, 329, 346, 363, 381, 399, 416, 432, 448, 465, 482, 5,
- 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
- 170, 195, 216, 240, 259, 275, 295, 312, 331, 348, 365, 383, 400,
- 417, 433, 449, 467, 485, 10, 12, 14, 19, 23, 31, 41, 52,
- 65, 81, 96, 113, 133, 152, 175, 201, 221, 243, 260, 280, 297,
- 315, 333, 350, 367, 385, 402, 418, 434, 452, 470, 486, 16, 18,
- 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
- 203, 226, 244, 264, 283, 300, 318, 335, 353, 370, 388, 404, 420,
- 438, 455, 471, 487, 25, 27, 29, 32, 40, 46, 54, 67, 79,
- 94, 109, 127, 143, 164, 185, 210, 231, 250, 269, 285, 304, 322,
- 339, 356, 373, 389, 407, 423, 440, 457, 473, 491, 34, 36, 38,
- 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219,
- 239, 256, 272, 291, 308, 324, 341, 359, 377, 393, 410, 426, 442,
- 460, 476, 492, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
- 131, 147, 162, 183, 208, 227, 245, 262, 282, 298, 314, 332, 349,
- 364, 379, 396, 412, 430, 446, 462, 478, 495, 57, 61, 63, 66,
- 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 217, 234, 253,
- 270, 286, 305, 321, 337, 354, 371, 387, 403, 419, 435, 451, 468,
- 484, 498, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
- 173, 190, 211, 229, 246, 261, 281, 296, 311, 325, 344, 360, 375,
- 392, 408, 425, 441, 456, 472, 489, 500, 89, 91, 93, 97, 101,
- 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 257, 271, 287,
- 303, 320, 336, 351, 366, 384, 398, 413, 429, 445, 461, 477, 493,
- 502, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
- 222, 237, 251, 267, 284, 299, 313, 327, 343, 358, 374, 390, 405,
- 422, 437, 453, 469, 483, 497, 505, 122, 126, 130, 134, 138, 144,
- 155, 163, 180, 191, 207, 223, 232, 248, 265, 278, 293, 307, 323,
- 338, 352, 368, 382, 395, 411, 427, 443, 459, 475, 490, 501, 507,
- 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 238, 249,
- 263, 276, 289, 306, 319, 334, 347, 361, 376, 391, 406, 421, 436,
- 450, 463, 479, 496, 504, 509, 167, 169, 172, 178, 182, 188, 198,
- 209, 218, 230, 242, 252, 266, 277, 288, 301, 317, 330, 342, 357,
- 372, 386, 401, 414, 428, 444, 458, 474, 488, 499, 506, 510, 192,
- 194, 196, 202, 204, 213, 220, 228, 235, 247, 258, 268, 279, 290,
- 302, 316, 326, 340, 355, 369, 380, 394, 409, 424, 439, 454, 466,
- 480, 494, 503, 508, 511, 512, 513, 514, 515, 516, 517, 520, 523,
- 526, 532, 537, 545, 551, 561, 573, 581, 596, 610, 625, 642, 661,
- 680, 701, 722, 745, 770, 800, 827, 853, 875, 897, 919, 518, 519,
- 521, 522, 524, 525, 528, 533, 536, 542, 549, 557, 564, 575, 585,
- 597, 611, 623, 640, 656, 676, 696, 717, 739, 763, 789, 815, 844,
- 867, 889, 909, 927, 527, 529, 530, 531, 534, 535, 538, 544, 548,
- 555, 560, 569, 579, 589, 598, 614, 626, 641, 655, 673, 690, 712,
- 735, 760, 780, 806, 834, 857, 880, 902, 921, 940, 539, 540, 541,
- 543, 546, 547, 550, 558, 562, 567, 576, 583, 593, 603, 616, 631,
- 643, 657, 674, 689, 710, 733, 752, 776, 803, 830, 850, 872, 892,
- 913, 934, 950, 552, 553, 554, 556, 559, 563, 565, 571, 577, 582,
- 591, 600, 609, 620, 634, 644, 662, 677, 691, 711, 730, 748, 773,
- 798, 822, 847, 869, 887, 906, 925, 942, 961, 566, 568, 570, 572,
- 574, 578, 580, 588, 594, 601, 608, 617, 629, 637, 652, 665, 681,
- 697, 713, 734, 749, 772, 793, 819, 842, 863, 884, 904, 923, 938,
- 954, 967, 584, 586, 587, 590, 592, 595, 599, 605, 613, 618, 628,
- 636, 648, 660, 671, 686, 702, 718, 736, 753, 774, 794, 818, 840,
- 860, 882, 900, 917, 936, 952, 965, 977, 602, 604, 606, 607, 612,
- 615, 619, 624, 633, 638, 649, 658, 666, 683, 692, 707, 723, 740,
- 761, 777, 799, 820, 841, 859, 877, 895, 915, 932, 948, 963, 975,
- 986, 621, 622, 627, 630, 632, 635, 639, 645, 653, 663, 668, 682,
- 688, 704, 716, 732, 746, 764, 781, 804, 823, 843, 861, 878, 894,
- 911, 930, 946, 959, 973, 984, 994, 646, 647, 650, 651, 654, 659,
- 664, 667, 678, 685, 693, 706, 715, 728, 743, 757, 771, 790, 807,
- 831, 848, 864, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001,
- 669, 670, 672, 675, 679, 684, 687, 694, 703, 709, 719, 729, 741,
- 754, 767, 783, 801, 816, 835, 851, 870, 885, 901, 916, 931, 945,
- 956, 969, 980, 990, 999, 1007, 695, 698, 699, 700, 705, 708, 714,
- 720, 726, 738, 744, 758, 768, 779, 795, 810, 828, 845, 858, 873,
- 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 721,
- 724, 725, 727, 731, 737, 742, 747, 756, 765, 775, 786, 797, 809,
- 825, 837, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981,
- 989, 996, 1003, 1010, 1016, 750, 751, 755, 759, 762, 766, 769, 778,
- 787, 792, 805, 812, 829, 838, 852, 865, 876, 890, 903, 914, 926,
- 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 782, 784,
- 785, 788, 791, 796, 802, 808, 814, 826, 836, 846, 856, 866, 874,
- 886, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
- 1011, 1015, 1018, 1021, 811, 813, 817, 821, 824, 832, 833, 839, 849,
- 855, 862, 871, 879, 891, 899, 908, 920, 929, 941, 951, 962, 968,
- 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_qtr_iscan_32x32[1024]) = {
- 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
- 142, 166, 189, 256, 268, 286, 310, 334, 364, 400, 435, 471, 510,
- 553, 598, 640, 683, 732, 780, 2, 3, 6, 11, 17, 26, 35,
- 45, 58, 73, 90, 106, 123, 146, 168, 193, 258, 270, 288, 312,
- 338, 366, 402, 437, 473, 516, 557, 600, 642, 687, 736, 782, 5,
- 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
- 170, 195, 260, 274, 292, 314, 340, 370, 406, 441, 478, 520, 559,
- 604, 646, 689, 740, 788, 10, 12, 14, 19, 23, 31, 41, 52,
- 65, 81, 96, 113, 133, 152, 175, 201, 262, 276, 294, 316, 344,
- 376, 410, 445, 484, 524, 563, 606, 648, 697, 746, 793, 16, 18,
- 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
- 203, 264, 278, 300, 322, 350, 380, 414, 451, 490, 530, 571, 612,
- 656, 705, 750, 799, 25, 27, 29, 32, 40, 46, 54, 67, 79,
- 94, 109, 127, 143, 164, 185, 210, 266, 282, 302, 326, 354, 388,
- 422, 459, 496, 533, 579, 618, 665, 711, 754, 809, 34, 36, 38,
- 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 216,
- 272, 289, 308, 332, 362, 392, 427, 465, 504, 545, 585, 626, 671,
- 717, 766, 813, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
- 131, 147, 162, 183, 208, 222, 279, 298, 320, 346, 374, 408, 442,
- 475, 511, 551, 592, 638, 681, 726, 772, 821, 57, 61, 63, 66,
- 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 214, 227, 284,
- 304, 328, 355, 386, 418, 455, 492, 528, 567, 608, 649, 695, 742,
- 786, 833, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
- 173, 190, 211, 224, 233, 296, 317, 342, 367, 394, 433, 466, 500,
- 543, 581, 622, 667, 707, 752, 803, 843, 89, 91, 93, 97, 101,
- 110, 118, 132, 141, 157, 171, 186, 206, 220, 231, 239, 306, 330,
- 352, 384, 415, 447, 482, 521, 554, 593, 636, 677, 722, 770, 815,
- 852, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
- 218, 229, 237, 244, 323, 347, 371, 398, 431, 463, 498, 534, 573,
- 616, 654, 698, 743, 783, 831, 864, 122, 126, 130, 134, 138, 144,
- 155, 163, 180, 191, 207, 219, 226, 235, 242, 248, 335, 360, 390,
- 419, 449, 485, 518, 549, 587, 630, 672, 715, 760, 805, 845, 872,
- 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 221, 230, 236,
- 241, 246, 251, 356, 382, 411, 438, 469, 501, 539, 577, 613, 652,
- 690, 730, 776, 822, 858, 886, 167, 169, 172, 178, 182, 188, 198,
- 209, 215, 225, 232, 238, 243, 247, 250, 253, 378, 403, 428, 461,
- 494, 526, 560, 594, 632, 675, 713, 755, 801, 837, 868, 897, 192,
- 194, 196, 202, 204, 213, 217, 223, 228, 234, 240, 245, 249, 252,
- 254, 255, 395, 425, 457, 488, 512, 547, 583, 619, 659, 699, 737,
- 778, 819, 854, 882, 907, 257, 259, 261, 263, 265, 267, 273, 280,
- 285, 297, 307, 324, 336, 357, 379, 396, 424, 452, 479, 508, 541,
- 574, 609, 643, 679, 719, 764, 806, 841, 870, 895, 919, 269, 271,
- 275, 277, 281, 283, 290, 299, 305, 318, 331, 348, 361, 383, 404,
- 426, 453, 476, 506, 535, 568, 601, 634, 669, 708, 748, 789, 829,
- 860, 887, 909, 927, 287, 291, 293, 295, 301, 303, 309, 321, 329,
- 343, 353, 372, 391, 412, 429, 458, 480, 507, 532, 564, 590, 627,
- 663, 703, 733, 773, 816, 847, 876, 901, 921, 940, 311, 313, 315,
- 319, 325, 327, 333, 349, 358, 368, 385, 399, 420, 439, 462, 489,
- 509, 536, 565, 589, 624, 661, 691, 727, 768, 810, 838, 866, 890,
- 913, 934, 950, 337, 339, 341, 345, 351, 359, 363, 375, 387, 397,
- 416, 432, 450, 470, 495, 513, 542, 569, 591, 625, 657, 684, 723,
- 762, 797, 834, 862, 884, 905, 925, 942, 961, 365, 369, 373, 377,
- 381, 389, 393, 409, 421, 434, 448, 464, 486, 502, 527, 548, 575,
- 602, 628, 662, 685, 721, 756, 794, 827, 855, 880, 903, 923, 938,
- 954, 967, 401, 405, 407, 413, 417, 423, 430, 443, 456, 467, 483,
- 499, 519, 540, 561, 584, 610, 635, 664, 692, 724, 757, 792, 825,
- 850, 878, 899, 917, 936, 952, 965, 977, 436, 440, 444, 446, 454,
- 460, 468, 477, 493, 503, 522, 537, 550, 578, 595, 620, 644, 670,
- 704, 728, 763, 795, 826, 849, 873, 893, 915, 932, 948, 963, 975,
- 986, 472, 474, 481, 487, 491, 497, 505, 514, 529, 544, 555, 576,
- 588, 614, 633, 660, 680, 709, 734, 769, 798, 828, 851, 874, 892,
- 911, 930, 946, 959, 973, 984, 994, 515, 517, 523, 525, 531, 538,
- 546, 552, 570, 582, 596, 617, 631, 653, 676, 700, 720, 749, 774,
- 811, 835, 856, 879, 894, 912, 928, 944, 957, 971, 982, 992, 1001,
- 556, 558, 562, 566, 572, 580, 586, 597, 611, 623, 637, 655, 673,
- 693, 714, 738, 765, 790, 817, 839, 863, 881, 900, 916, 931, 945,
- 956, 969, 980, 990, 999, 1007, 599, 603, 605, 607, 615, 621, 629,
- 639, 650, 668, 678, 701, 716, 731, 758, 779, 807, 830, 848, 867,
- 885, 904, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 641,
- 645, 647, 651, 658, 666, 674, 682, 696, 710, 725, 744, 761, 777,
- 802, 820, 842, 861, 877, 891, 906, 924, 937, 949, 960, 972, 981,
- 989, 996, 1003, 1010, 1016, 686, 688, 694, 702, 706, 712, 718, 729,
- 745, 753, 771, 784, 808, 823, 840, 857, 871, 888, 902, 914, 926,
- 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 735, 739,
- 741, 747, 751, 759, 767, 775, 787, 804, 818, 832, 846, 859, 869,
- 883, 896, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
- 1011, 1015, 1018, 1021, 781, 785, 791, 796, 800, 812, 814, 824, 836,
- 844, 853, 865, 875, 889, 898, 908, 920, 929, 941, 951, 962, 968,
- 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
-};
-
-#if CONFIG_TX64X64
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x64[2048]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78,
- 91, 105, 120, 136, 153, 171, 190, 210, 231, 253, 276, 300, 325,
- 351, 378, 406, 435, 465, 496, 2, 4, 7, 11, 16, 22, 29,
- 37, 46, 56, 67, 79, 92, 106, 121, 137, 154, 172, 191, 211,
- 232, 254, 277, 301, 326, 352, 379, 407, 436, 466, 497, 528, 5,
- 8, 12, 17, 23, 30, 38, 47, 57, 68, 80, 93, 107, 122,
- 138, 155, 173, 192, 212, 233, 255, 278, 302, 327, 353, 380, 408,
- 437, 467, 498, 529, 560, 9, 13, 18, 24, 31, 39, 48, 58,
- 69, 81, 94, 108, 123, 139, 156, 174, 193, 213, 234, 256, 279,
- 303, 328, 354, 381, 409, 438, 468, 499, 530, 561, 592, 14, 19,
- 25, 32, 40, 49, 59, 70, 82, 95, 109, 124, 140, 157, 175,
- 194, 214, 235, 257, 280, 304, 329, 355, 382, 410, 439, 469, 500,
- 531, 562, 593, 624, 20, 26, 33, 41, 50, 60, 71, 83, 96,
- 110, 125, 141, 158, 176, 195, 215, 236, 258, 281, 305, 330, 356,
- 383, 411, 440, 470, 501, 532, 563, 594, 625, 656, 27, 34, 42,
- 51, 61, 72, 84, 97, 111, 126, 142, 159, 177, 196, 216, 237,
- 259, 282, 306, 331, 357, 384, 412, 441, 471, 502, 533, 564, 595,
- 626, 657, 688, 35, 43, 52, 62, 73, 85, 98, 112, 127, 143,
- 160, 178, 197, 217, 238, 260, 283, 307, 332, 358, 385, 413, 442,
- 472, 503, 534, 565, 596, 627, 658, 689, 720, 44, 53, 63, 74,
- 86, 99, 113, 128, 144, 161, 179, 198, 218, 239, 261, 284, 308,
- 333, 359, 386, 414, 443, 473, 504, 535, 566, 597, 628, 659, 690,
- 721, 752, 54, 64, 75, 87, 100, 114, 129, 145, 162, 180, 199,
- 219, 240, 262, 285, 309, 334, 360, 387, 415, 444, 474, 505, 536,
- 567, 598, 629, 660, 691, 722, 753, 784, 65, 76, 88, 101, 115,
- 130, 146, 163, 181, 200, 220, 241, 263, 286, 310, 335, 361, 388,
- 416, 445, 475, 506, 537, 568, 599, 630, 661, 692, 723, 754, 785,
- 816, 77, 89, 102, 116, 131, 147, 164, 182, 201, 221, 242, 264,
- 287, 311, 336, 362, 389, 417, 446, 476, 507, 538, 569, 600, 631,
- 662, 693, 724, 755, 786, 817, 848, 90, 103, 117, 132, 148, 165,
- 183, 202, 222, 243, 265, 288, 312, 337, 363, 390, 418, 447, 477,
- 508, 539, 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880,
- 104, 118, 133, 149, 166, 184, 203, 223, 244, 266, 289, 313, 338,
- 364, 391, 419, 448, 478, 509, 540, 571, 602, 633, 664, 695, 726,
- 757, 788, 819, 850, 881, 912, 119, 134, 150, 167, 185, 204, 224,
- 245, 267, 290, 314, 339, 365, 392, 420, 449, 479, 510, 541, 572,
- 603, 634, 665, 696, 727, 758, 789, 820, 851, 882, 913, 944, 135,
- 151, 168, 186, 205, 225, 246, 268, 291, 315, 340, 366, 393, 421,
- 450, 480, 511, 542, 573, 604, 635, 666, 697, 728, 759, 790, 821,
- 852, 883, 914, 945, 976, 152, 169, 187, 206, 226, 247, 269, 292,
- 316, 341, 367, 394, 422, 451, 481, 512, 543, 574, 605, 636, 667,
- 698, 729, 760, 791, 822, 853, 884, 915, 946, 977, 1008, 170, 188,
- 207, 227, 248, 270, 293, 317, 342, 368, 395, 423, 452, 482, 513,
- 544, 575, 606, 637, 668, 699, 730, 761, 792, 823, 854, 885, 916,
- 947, 978, 1009, 1040, 189, 208, 228, 249, 271, 294, 318, 343, 369,
- 396, 424, 453, 483, 514, 545, 576, 607, 638, 669, 700, 731, 762,
- 793, 824, 855, 886, 917, 948, 979, 1010, 1041, 1072, 209, 229, 250,
- 272, 295, 319, 344, 370, 397, 425, 454, 484, 515, 546, 577, 608,
- 639, 670, 701, 732, 763, 794, 825, 856, 887, 918, 949, 980, 1011,
- 1042, 1073, 1104, 230, 251, 273, 296, 320, 345, 371, 398, 426, 455,
- 485, 516, 547, 578, 609, 640, 671, 702, 733, 764, 795, 826, 857,
- 888, 919, 950, 981, 1012, 1043, 1074, 1105, 1136, 252, 274, 297, 321,
- 346, 372, 399, 427, 456, 486, 517, 548, 579, 610, 641, 672, 703,
- 734, 765, 796, 827, 858, 889, 920, 951, 982, 1013, 1044, 1075, 1106,
- 1137, 1168, 275, 298, 322, 347, 373, 400, 428, 457, 487, 518, 549,
- 580, 611, 642, 673, 704, 735, 766, 797, 828, 859, 890, 921, 952,
- 983, 1014, 1045, 1076, 1107, 1138, 1169, 1200, 299, 323, 348, 374, 401,
- 429, 458, 488, 519, 550, 581, 612, 643, 674, 705, 736, 767, 798,
- 829, 860, 891, 922, 953, 984, 1015, 1046, 1077, 1108, 1139, 1170, 1201,
- 1232, 324, 349, 375, 402, 430, 459, 489, 520, 551, 582, 613, 644,
- 675, 706, 737, 768, 799, 830, 861, 892, 923, 954, 985, 1016, 1047,
- 1078, 1109, 1140, 1171, 1202, 1233, 1264, 350, 376, 403, 431, 460, 490,
- 521, 552, 583, 614, 645, 676, 707, 738, 769, 800, 831, 862, 893,
- 924, 955, 986, 1017, 1048, 1079, 1110, 1141, 1172, 1203, 1234, 1265, 1296,
- 377, 404, 432, 461, 491, 522, 553, 584, 615, 646, 677, 708, 739,
- 770, 801, 832, 863, 894, 925, 956, 987, 1018, 1049, 1080, 1111, 1142,
- 1173, 1204, 1235, 1266, 1297, 1328, 405, 433, 462, 492, 523, 554, 585,
- 616, 647, 678, 709, 740, 771, 802, 833, 864, 895, 926, 957, 988,
- 1019, 1050, 1081, 1112, 1143, 1174, 1205, 1236, 1267, 1298, 1329, 1360, 434,
- 463, 493, 524, 555, 586, 617, 648, 679, 710, 741, 772, 803, 834,
- 865, 896, 927, 958, 989, 1020, 1051, 1082, 1113, 1144, 1175, 1206, 1237,
- 1268, 1299, 1330, 1361, 1392, 464, 494, 525, 556, 587, 618, 649, 680,
- 711, 742, 773, 804, 835, 866, 897, 928, 959, 990, 1021, 1052, 1083,
- 1114, 1145, 1176, 1207, 1238, 1269, 1300, 1331, 1362, 1393, 1424, 495, 526,
- 557, 588, 619, 650, 681, 712, 743, 774, 805, 836, 867, 898, 929,
- 960, 991, 1022, 1053, 1084, 1115, 1146, 1177, 1208, 1239, 1270, 1301, 1332,
- 1363, 1394, 1425, 1456, 527, 558, 589, 620, 651, 682, 713, 744, 775,
- 806, 837, 868, 899, 930, 961, 992, 1023, 1054, 1085, 1116, 1147, 1178,
- 1209, 1240, 1271, 1302, 1333, 1364, 1395, 1426, 1457, 1488, 559, 590, 621,
- 652, 683, 714, 745, 776, 807, 838, 869, 900, 931, 962, 993, 1024,
- 1055, 1086, 1117, 1148, 1179, 1210, 1241, 1272, 1303, 1334, 1365, 1396, 1427,
- 1458, 1489, 1520, 591, 622, 653, 684, 715, 746, 777, 808, 839, 870,
- 901, 932, 963, 994, 1025, 1056, 1087, 1118, 1149, 1180, 1211, 1242, 1273,
- 1304, 1335, 1366, 1397, 1428, 1459, 1490, 1521, 1552, 623, 654, 685, 716,
- 747, 778, 809, 840, 871, 902, 933, 964, 995, 1026, 1057, 1088, 1119,
- 1150, 1181, 1212, 1243, 1274, 1305, 1336, 1367, 1398, 1429, 1460, 1491, 1522,
- 1553, 1583, 655, 686, 717, 748, 779, 810, 841, 872, 903, 934, 965,
- 996, 1027, 1058, 1089, 1120, 1151, 1182, 1213, 1244, 1275, 1306, 1337, 1368,
- 1399, 1430, 1461, 1492, 1523, 1554, 1584, 1613, 687, 718, 749, 780, 811,
- 842, 873, 904, 935, 966, 997, 1028, 1059, 1090, 1121, 1152, 1183, 1214,
- 1245, 1276, 1307, 1338, 1369, 1400, 1431, 1462, 1493, 1524, 1555, 1585, 1614,
- 1642, 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 1029, 1060,
- 1091, 1122, 1153, 1184, 1215, 1246, 1277, 1308, 1339, 1370, 1401, 1432, 1463,
- 1494, 1525, 1556, 1586, 1615, 1643, 1670, 751, 782, 813, 844, 875, 906,
- 937, 968, 999, 1030, 1061, 1092, 1123, 1154, 1185, 1216, 1247, 1278, 1309,
- 1340, 1371, 1402, 1433, 1464, 1495, 1526, 1557, 1587, 1616, 1644, 1671, 1697,
- 783, 814, 845, 876, 907, 938, 969, 1000, 1031, 1062, 1093, 1124, 1155,
- 1186, 1217, 1248, 1279, 1310, 1341, 1372, 1403, 1434, 1465, 1496, 1527, 1558,
- 1588, 1617, 1645, 1672, 1698, 1723, 815, 846, 877, 908, 939, 970, 1001,
- 1032, 1063, 1094, 1125, 1156, 1187, 1218, 1249, 1280, 1311, 1342, 1373, 1404,
- 1435, 1466, 1497, 1528, 1559, 1589, 1618, 1646, 1673, 1699, 1724, 1748, 847,
- 878, 909, 940, 971, 1002, 1033, 1064, 1095, 1126, 1157, 1188, 1219, 1250,
- 1281, 1312, 1343, 1374, 1405, 1436, 1467, 1498, 1529, 1560, 1590, 1619, 1647,
- 1674, 1700, 1725, 1749, 1772, 879, 910, 941, 972, 1003, 1034, 1065, 1096,
- 1127, 1158, 1189, 1220, 1251, 1282, 1313, 1344, 1375, 1406, 1437, 1468, 1499,
- 1530, 1561, 1591, 1620, 1648, 1675, 1701, 1726, 1750, 1773, 1795, 911, 942,
- 973, 1004, 1035, 1066, 1097, 1128, 1159, 1190, 1221, 1252, 1283, 1314, 1345,
- 1376, 1407, 1438, 1469, 1500, 1531, 1562, 1592, 1621, 1649, 1676, 1702, 1727,
- 1751, 1774, 1796, 1817, 943, 974, 1005, 1036, 1067, 1098, 1129, 1160, 1191,
- 1222, 1253, 1284, 1315, 1346, 1377, 1408, 1439, 1470, 1501, 1532, 1563, 1593,
- 1622, 1650, 1677, 1703, 1728, 1752, 1775, 1797, 1818, 1838, 975, 1006, 1037,
- 1068, 1099, 1130, 1161, 1192, 1223, 1254, 1285, 1316, 1347, 1378, 1409, 1440,
- 1471, 1502, 1533, 1564, 1594, 1623, 1651, 1678, 1704, 1729, 1753, 1776, 1798,
- 1819, 1839, 1858, 1007, 1038, 1069, 1100, 1131, 1162, 1193, 1224, 1255, 1286,
- 1317, 1348, 1379, 1410, 1441, 1472, 1503, 1534, 1565, 1595, 1624, 1652, 1679,
- 1705, 1730, 1754, 1777, 1799, 1820, 1840, 1859, 1877, 1039, 1070, 1101, 1132,
- 1163, 1194, 1225, 1256, 1287, 1318, 1349, 1380, 1411, 1442, 1473, 1504, 1535,
- 1566, 1596, 1625, 1653, 1680, 1706, 1731, 1755, 1778, 1800, 1821, 1841, 1860,
- 1878, 1895, 1071, 1102, 1133, 1164, 1195, 1226, 1257, 1288, 1319, 1350, 1381,
- 1412, 1443, 1474, 1505, 1536, 1567, 1597, 1626, 1654, 1681, 1707, 1732, 1756,
- 1779, 1801, 1822, 1842, 1861, 1879, 1896, 1912, 1103, 1134, 1165, 1196, 1227,
- 1258, 1289, 1320, 1351, 1382, 1413, 1444, 1475, 1506, 1537, 1568, 1598, 1627,
- 1655, 1682, 1708, 1733, 1757, 1780, 1802, 1823, 1843, 1862, 1880, 1897, 1913,
- 1928, 1135, 1166, 1197, 1228, 1259, 1290, 1321, 1352, 1383, 1414, 1445, 1476,
- 1507, 1538, 1569, 1599, 1628, 1656, 1683, 1709, 1734, 1758, 1781, 1803, 1824,
- 1844, 1863, 1881, 1898, 1914, 1929, 1943, 1167, 1198, 1229, 1260, 1291, 1322,
- 1353, 1384, 1415, 1446, 1477, 1508, 1539, 1570, 1600, 1629, 1657, 1684, 1710,
- 1735, 1759, 1782, 1804, 1825, 1845, 1864, 1882, 1899, 1915, 1930, 1944, 1957,
- 1199, 1230, 1261, 1292, 1323, 1354, 1385, 1416, 1447, 1478, 1509, 1540, 1571,
- 1601, 1630, 1658, 1685, 1711, 1736, 1760, 1783, 1805, 1826, 1846, 1865, 1883,
- 1900, 1916, 1931, 1945, 1958, 1970, 1231, 1262, 1293, 1324, 1355, 1386, 1417,
- 1448, 1479, 1510, 1541, 1572, 1602, 1631, 1659, 1686, 1712, 1737, 1761, 1784,
- 1806, 1827, 1847, 1866, 1884, 1901, 1917, 1932, 1946, 1959, 1971, 1982, 1263,
- 1294, 1325, 1356, 1387, 1418, 1449, 1480, 1511, 1542, 1573, 1603, 1632, 1660,
- 1687, 1713, 1738, 1762, 1785, 1807, 1828, 1848, 1867, 1885, 1902, 1918, 1933,
- 1947, 1960, 1972, 1983, 1993, 1295, 1326, 1357, 1388, 1419, 1450, 1481, 1512,
- 1543, 1574, 1604, 1633, 1661, 1688, 1714, 1739, 1763, 1786, 1808, 1829, 1849,
- 1868, 1886, 1903, 1919, 1934, 1948, 1961, 1973, 1984, 1994, 2003, 1327, 1358,
- 1389, 1420, 1451, 1482, 1513, 1544, 1575, 1605, 1634, 1662, 1689, 1715, 1740,
- 1764, 1787, 1809, 1830, 1850, 1869, 1887, 1904, 1920, 1935, 1949, 1962, 1974,
- 1985, 1995, 2004, 2012, 1359, 1390, 1421, 1452, 1483, 1514, 1545, 1576, 1606,
- 1635, 1663, 1690, 1716, 1741, 1765, 1788, 1810, 1831, 1851, 1870, 1888, 1905,
- 1921, 1936, 1950, 1963, 1975, 1986, 1996, 2005, 2013, 2020, 1391, 1422, 1453,
- 1484, 1515, 1546, 1577, 1607, 1636, 1664, 1691, 1717, 1742, 1766, 1789, 1811,
- 1832, 1852, 1871, 1889, 1906, 1922, 1937, 1951, 1964, 1976, 1987, 1997, 2006,
- 2014, 2021, 2027, 1423, 1454, 1485, 1516, 1547, 1578, 1608, 1637, 1665, 1692,
- 1718, 1743, 1767, 1790, 1812, 1833, 1853, 1872, 1890, 1907, 1923, 1938, 1952,
- 1965, 1977, 1988, 1998, 2007, 2015, 2022, 2028, 2033, 1455, 1486, 1517, 1548,
- 1579, 1609, 1638, 1666, 1693, 1719, 1744, 1768, 1791, 1813, 1834, 1854, 1873,
- 1891, 1908, 1924, 1939, 1953, 1966, 1978, 1989, 1999, 2008, 2016, 2023, 2029,
- 2034, 2038, 1487, 1518, 1549, 1580, 1610, 1639, 1667, 1694, 1720, 1745, 1769,
- 1792, 1814, 1835, 1855, 1874, 1892, 1909, 1925, 1940, 1954, 1967, 1979, 1990,
- 2000, 2009, 2017, 2024, 2030, 2035, 2039, 2042, 1519, 1550, 1581, 1611, 1640,
- 1668, 1695, 1721, 1746, 1770, 1793, 1815, 1836, 1856, 1875, 1893, 1910, 1926,
- 1941, 1955, 1968, 1980, 1991, 2001, 2010, 2018, 2025, 2031, 2036, 2040, 2043,
- 2045, 1551, 1582, 1612, 1641, 1669, 1696, 1722, 1747, 1771, 1794, 1816, 1837,
- 1857, 1876, 1894, 1911, 1927, 1942, 1956, 1969, 1981, 1992, 2002, 2011, 2019,
- 2026, 2032, 2037, 2041, 2044, 2046, 2047,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_64x32[2048]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78,
- 91, 105, 120, 136, 153, 171, 190, 210, 231, 253, 276, 300, 325,
- 351, 378, 406, 435, 465, 496, 528, 560, 592, 624, 656, 688, 720,
- 752, 784, 816, 848, 880, 912, 944, 976, 1008, 1040, 1072, 1104, 1136,
- 1168, 1200, 1232, 1264, 1296, 1328, 1360, 1392, 1424, 1456, 1488, 1520, 2,
- 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92, 106,
- 121, 137, 154, 172, 191, 211, 232, 254, 277, 301, 326, 352, 379,
- 407, 436, 466, 497, 529, 561, 593, 625, 657, 689, 721, 753, 785,
- 817, 849, 881, 913, 945, 977, 1009, 1041, 1073, 1105, 1137, 1169, 1201,
- 1233, 1265, 1297, 1329, 1361, 1393, 1425, 1457, 1489, 1521, 1552, 5, 8,
- 12, 17, 23, 30, 38, 47, 57, 68, 80, 93, 107, 122, 138,
- 155, 173, 192, 212, 233, 255, 278, 302, 327, 353, 380, 408, 437,
- 467, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818, 850,
- 882, 914, 946, 978, 1010, 1042, 1074, 1106, 1138, 1170, 1202, 1234, 1266,
- 1298, 1330, 1362, 1394, 1426, 1458, 1490, 1522, 1553, 1583, 9, 13, 18,
- 24, 31, 39, 48, 58, 69, 81, 94, 108, 123, 139, 156, 174,
- 193, 213, 234, 256, 279, 303, 328, 354, 381, 409, 438, 468, 499,
- 531, 563, 595, 627, 659, 691, 723, 755, 787, 819, 851, 883, 915,
- 947, 979, 1011, 1043, 1075, 1107, 1139, 1171, 1203, 1235, 1267, 1299, 1331,
- 1363, 1395, 1427, 1459, 1491, 1523, 1554, 1584, 1613, 14, 19, 25, 32,
- 40, 49, 59, 70, 82, 95, 109, 124, 140, 157, 175, 194, 214,
- 235, 257, 280, 304, 329, 355, 382, 410, 439, 469, 500, 532, 564,
- 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980,
- 1012, 1044, 1076, 1108, 1140, 1172, 1204, 1236, 1268, 1300, 1332, 1364, 1396,
- 1428, 1460, 1492, 1524, 1555, 1585, 1614, 1642, 20, 26, 33, 41, 50,
- 60, 71, 83, 96, 110, 125, 141, 158, 176, 195, 215, 236, 258,
- 281, 305, 330, 356, 383, 411, 440, 470, 501, 533, 565, 597, 629,
- 661, 693, 725, 757, 789, 821, 853, 885, 917, 949, 981, 1013, 1045,
- 1077, 1109, 1141, 1173, 1205, 1237, 1269, 1301, 1333, 1365, 1397, 1429, 1461,
- 1493, 1525, 1556, 1586, 1615, 1643, 1670, 27, 34, 42, 51, 61, 72,
- 84, 97, 111, 126, 142, 159, 177, 196, 216, 237, 259, 282, 306,
- 331, 357, 384, 412, 441, 471, 502, 534, 566, 598, 630, 662, 694,
- 726, 758, 790, 822, 854, 886, 918, 950, 982, 1014, 1046, 1078, 1110,
- 1142, 1174, 1206, 1238, 1270, 1302, 1334, 1366, 1398, 1430, 1462, 1494, 1526,
- 1557, 1587, 1616, 1644, 1671, 1697, 35, 43, 52, 62, 73, 85, 98,
- 112, 127, 143, 160, 178, 197, 217, 238, 260, 283, 307, 332, 358,
- 385, 413, 442, 472, 503, 535, 567, 599, 631, 663, 695, 727, 759,
- 791, 823, 855, 887, 919, 951, 983, 1015, 1047, 1079, 1111, 1143, 1175,
- 1207, 1239, 1271, 1303, 1335, 1367, 1399, 1431, 1463, 1495, 1527, 1558, 1588,
- 1617, 1645, 1672, 1698, 1723, 44, 53, 63, 74, 86, 99, 113, 128,
- 144, 161, 179, 198, 218, 239, 261, 284, 308, 333, 359, 386, 414,
- 443, 473, 504, 536, 568, 600, 632, 664, 696, 728, 760, 792, 824,
- 856, 888, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240,
- 1272, 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528, 1559, 1589, 1618, 1646,
- 1673, 1699, 1724, 1748, 54, 64, 75, 87, 100, 114, 129, 145, 162,
- 180, 199, 219, 240, 262, 285, 309, 334, 360, 387, 415, 444, 474,
- 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825, 857, 889,
- 921, 953, 985, 1017, 1049, 1081, 1113, 1145, 1177, 1209, 1241, 1273, 1305,
- 1337, 1369, 1401, 1433, 1465, 1497, 1529, 1560, 1590, 1619, 1647, 1674, 1700,
- 1725, 1749, 1772, 65, 76, 88, 101, 115, 130, 146, 163, 181, 200,
- 220, 241, 263, 286, 310, 335, 361, 388, 416, 445, 475, 506, 538,
- 570, 602, 634, 666, 698, 730, 762, 794, 826, 858, 890, 922, 954,
- 986, 1018, 1050, 1082, 1114, 1146, 1178, 1210, 1242, 1274, 1306, 1338, 1370,
- 1402, 1434, 1466, 1498, 1530, 1561, 1591, 1620, 1648, 1675, 1701, 1726, 1750,
- 1773, 1795, 77, 89, 102, 116, 131, 147, 164, 182, 201, 221, 242,
- 264, 287, 311, 336, 362, 389, 417, 446, 476, 507, 539, 571, 603,
- 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019,
- 1051, 1083, 1115, 1147, 1179, 1211, 1243, 1275, 1307, 1339, 1371, 1403, 1435,
- 1467, 1499, 1531, 1562, 1592, 1621, 1649, 1676, 1702, 1727, 1751, 1774, 1796,
- 1817, 90, 103, 117, 132, 148, 165, 183, 202, 222, 243, 265, 288,
- 312, 337, 363, 390, 418, 447, 477, 508, 540, 572, 604, 636, 668,
- 700, 732, 764, 796, 828, 860, 892, 924, 956, 988, 1020, 1052, 1084,
- 1116, 1148, 1180, 1212, 1244, 1276, 1308, 1340, 1372, 1404, 1436, 1468, 1500,
- 1532, 1563, 1593, 1622, 1650, 1677, 1703, 1728, 1752, 1775, 1797, 1818, 1838,
- 104, 118, 133, 149, 166, 184, 203, 223, 244, 266, 289, 313, 338,
- 364, 391, 419, 448, 478, 509, 541, 573, 605, 637, 669, 701, 733,
- 765, 797, 829, 861, 893, 925, 957, 989, 1021, 1053, 1085, 1117, 1149,
- 1181, 1213, 1245, 1277, 1309, 1341, 1373, 1405, 1437, 1469, 1501, 1533, 1564,
- 1594, 1623, 1651, 1678, 1704, 1729, 1753, 1776, 1798, 1819, 1839, 1858, 119,
- 134, 150, 167, 185, 204, 224, 245, 267, 290, 314, 339, 365, 392,
- 420, 449, 479, 510, 542, 574, 606, 638, 670, 702, 734, 766, 798,
- 830, 862, 894, 926, 958, 990, 1022, 1054, 1086, 1118, 1150, 1182, 1214,
- 1246, 1278, 1310, 1342, 1374, 1406, 1438, 1470, 1502, 1534, 1565, 1595, 1624,
- 1652, 1679, 1705, 1730, 1754, 1777, 1799, 1820, 1840, 1859, 1877, 135, 151,
- 168, 186, 205, 225, 246, 268, 291, 315, 340, 366, 393, 421, 450,
- 480, 511, 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863,
- 895, 927, 959, 991, 1023, 1055, 1087, 1119, 1151, 1183, 1215, 1247, 1279,
- 1311, 1343, 1375, 1407, 1439, 1471, 1503, 1535, 1566, 1596, 1625, 1653, 1680,
- 1706, 1731, 1755, 1778, 1800, 1821, 1841, 1860, 1878, 1895, 152, 169, 187,
- 206, 226, 247, 269, 292, 316, 341, 367, 394, 422, 451, 481, 512,
- 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928,
- 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344,
- 1376, 1408, 1440, 1472, 1504, 1536, 1567, 1597, 1626, 1654, 1681, 1707, 1732,
- 1756, 1779, 1801, 1822, 1842, 1861, 1879, 1896, 1912, 170, 188, 207, 227,
- 248, 270, 293, 317, 342, 368, 395, 423, 452, 482, 513, 545, 577,
- 609, 641, 673, 705, 737, 769, 801, 833, 865, 897, 929, 961, 993,
- 1025, 1057, 1089, 1121, 1153, 1185, 1217, 1249, 1281, 1313, 1345, 1377, 1409,
- 1441, 1473, 1505, 1537, 1568, 1598, 1627, 1655, 1682, 1708, 1733, 1757, 1780,
- 1802, 1823, 1843, 1862, 1880, 1897, 1913, 1928, 189, 208, 228, 249, 271,
- 294, 318, 343, 369, 396, 424, 453, 483, 514, 546, 578, 610, 642,
- 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 1026, 1058,
- 1090, 1122, 1154, 1186, 1218, 1250, 1282, 1314, 1346, 1378, 1410, 1442, 1474,
- 1506, 1538, 1569, 1599, 1628, 1656, 1683, 1709, 1734, 1758, 1781, 1803, 1824,
- 1844, 1863, 1881, 1898, 1914, 1929, 1943, 209, 229, 250, 272, 295, 319,
- 344, 370, 397, 425, 454, 484, 515, 547, 579, 611, 643, 675, 707,
- 739, 771, 803, 835, 867, 899, 931, 963, 995, 1027, 1059, 1091, 1123,
- 1155, 1187, 1219, 1251, 1283, 1315, 1347, 1379, 1411, 1443, 1475, 1507, 1539,
- 1570, 1600, 1629, 1657, 1684, 1710, 1735, 1759, 1782, 1804, 1825, 1845, 1864,
- 1882, 1899, 1915, 1930, 1944, 1957, 230, 251, 273, 296, 320, 345, 371,
- 398, 426, 455, 485, 516, 548, 580, 612, 644, 676, 708, 740, 772,
- 804, 836, 868, 900, 932, 964, 996, 1028, 1060, 1092, 1124, 1156, 1188,
- 1220, 1252, 1284, 1316, 1348, 1380, 1412, 1444, 1476, 1508, 1540, 1571, 1601,
- 1630, 1658, 1685, 1711, 1736, 1760, 1783, 1805, 1826, 1846, 1865, 1883, 1900,
- 1916, 1931, 1945, 1958, 1970, 252, 274, 297, 321, 346, 372, 399, 427,
- 456, 486, 517, 549, 581, 613, 645, 677, 709, 741, 773, 805, 837,
- 869, 901, 933, 965, 997, 1029, 1061, 1093, 1125, 1157, 1189, 1221, 1253,
- 1285, 1317, 1349, 1381, 1413, 1445, 1477, 1509, 1541, 1572, 1602, 1631, 1659,
- 1686, 1712, 1737, 1761, 1784, 1806, 1827, 1847, 1866, 1884, 1901, 1917, 1932,
- 1946, 1959, 1971, 1982, 275, 298, 322, 347, 373, 400, 428, 457, 487,
- 518, 550, 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902,
- 934, 966, 998, 1030, 1062, 1094, 1126, 1158, 1190, 1222, 1254, 1286, 1318,
- 1350, 1382, 1414, 1446, 1478, 1510, 1542, 1573, 1603, 1632, 1660, 1687, 1713,
- 1738, 1762, 1785, 1807, 1828, 1848, 1867, 1885, 1902, 1918, 1933, 1947, 1960,
- 1972, 1983, 1993, 299, 323, 348, 374, 401, 429, 458, 488, 519, 551,
- 583, 615, 647, 679, 711, 743, 775, 807, 839, 871, 903, 935, 967,
- 999, 1031, 1063, 1095, 1127, 1159, 1191, 1223, 1255, 1287, 1319, 1351, 1383,
- 1415, 1447, 1479, 1511, 1543, 1574, 1604, 1633, 1661, 1688, 1714, 1739, 1763,
- 1786, 1808, 1829, 1849, 1868, 1886, 1903, 1919, 1934, 1948, 1961, 1973, 1984,
- 1994, 2003, 324, 349, 375, 402, 430, 459, 489, 520, 552, 584, 616,
- 648, 680, 712, 744, 776, 808, 840, 872, 904, 936, 968, 1000, 1032,
- 1064, 1096, 1128, 1160, 1192, 1224, 1256, 1288, 1320, 1352, 1384, 1416, 1448,
- 1480, 1512, 1544, 1575, 1605, 1634, 1662, 1689, 1715, 1740, 1764, 1787, 1809,
- 1830, 1850, 1869, 1887, 1904, 1920, 1935, 1949, 1962, 1974, 1985, 1995, 2004,
- 2012, 350, 376, 403, 431, 460, 490, 521, 553, 585, 617, 649, 681,
- 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 1033, 1065, 1097,
- 1129, 1161, 1193, 1225, 1257, 1289, 1321, 1353, 1385, 1417, 1449, 1481, 1513,
- 1545, 1576, 1606, 1635, 1663, 1690, 1716, 1741, 1765, 1788, 1810, 1831, 1851,
- 1870, 1888, 1905, 1921, 1936, 1950, 1963, 1975, 1986, 1996, 2005, 2013, 2020,
- 377, 404, 432, 461, 491, 522, 554, 586, 618, 650, 682, 714, 746,
- 778, 810, 842, 874, 906, 938, 970, 1002, 1034, 1066, 1098, 1130, 1162,
- 1194, 1226, 1258, 1290, 1322, 1354, 1386, 1418, 1450, 1482, 1514, 1546, 1577,
- 1607, 1636, 1664, 1691, 1717, 1742, 1766, 1789, 1811, 1832, 1852, 1871, 1889,
- 1906, 1922, 1937, 1951, 1964, 1976, 1987, 1997, 2006, 2014, 2021, 2027, 405,
- 433, 462, 492, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811,
- 843, 875, 907, 939, 971, 1003, 1035, 1067, 1099, 1131, 1163, 1195, 1227,
- 1259, 1291, 1323, 1355, 1387, 1419, 1451, 1483, 1515, 1547, 1578, 1608, 1637,
- 1665, 1692, 1718, 1743, 1767, 1790, 1812, 1833, 1853, 1872, 1890, 1907, 1923,
- 1938, 1952, 1965, 1977, 1988, 1998, 2007, 2015, 2022, 2028, 2033, 434, 463,
- 493, 524, 556, 588, 620, 652, 684, 716, 748, 780, 812, 844, 876,
- 908, 940, 972, 1004, 1036, 1068, 1100, 1132, 1164, 1196, 1228, 1260, 1292,
- 1324, 1356, 1388, 1420, 1452, 1484, 1516, 1548, 1579, 1609, 1638, 1666, 1693,
- 1719, 1744, 1768, 1791, 1813, 1834, 1854, 1873, 1891, 1908, 1924, 1939, 1953,
- 1966, 1978, 1989, 1999, 2008, 2016, 2023, 2029, 2034, 2038, 464, 494, 525,
- 557, 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941,
- 973, 1005, 1037, 1069, 1101, 1133, 1165, 1197, 1229, 1261, 1293, 1325, 1357,
- 1389, 1421, 1453, 1485, 1517, 1549, 1580, 1610, 1639, 1667, 1694, 1720, 1745,
- 1769, 1792, 1814, 1835, 1855, 1874, 1892, 1909, 1925, 1940, 1954, 1967, 1979,
- 1990, 2000, 2009, 2017, 2024, 2030, 2035, 2039, 2042, 495, 526, 558, 590,
- 622, 654, 686, 718, 750, 782, 814, 846, 878, 910, 942, 974, 1006,
- 1038, 1070, 1102, 1134, 1166, 1198, 1230, 1262, 1294, 1326, 1358, 1390, 1422,
- 1454, 1486, 1518, 1550, 1581, 1611, 1640, 1668, 1695, 1721, 1746, 1770, 1793,
- 1815, 1836, 1856, 1875, 1893, 1910, 1926, 1941, 1955, 1968, 1980, 1991, 2001,
- 2010, 2018, 2025, 2031, 2036, 2040, 2043, 2045, 527, 559, 591, 623, 655,
- 687, 719, 751, 783, 815, 847, 879, 911, 943, 975, 1007, 1039, 1071,
- 1103, 1135, 1167, 1199, 1231, 1263, 1295, 1327, 1359, 1391, 1423, 1455, 1487,
- 1519, 1551, 1582, 1612, 1641, 1669, 1696, 1722, 1747, 1771, 1794, 1816, 1837,
- 1857, 1876, 1894, 1911, 1927, 1942, 1956, 1969, 1981, 1992, 2002, 2011, 2019,
- 2026, 2032, 2037, 2041, 2044, 2046, 2047,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_64x64[4096]) = {
- 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
- 142, 166, 189, 214, 239, 269, 300, 331, 363, 400, 435, 471, 510,
- 553, 598, 640, 683, 732, 780, 833, 884, 937, 995, 1048, 1107, 1165,
- 1230, 1293, 1353, 1422, 1489, 1562, 1632, 1701, 1776, 1850, 1929, 2006, 2091,
- 2173, 2252, 2339, 2421, 2516, 2603, 2694, 2786, 2879, 2978, 3076, 3175, 2,
- 3, 6, 11, 17, 26, 35, 45, 58, 73, 90, 106, 123, 146,
- 168, 193, 216, 243, 271, 302, 335, 365, 402, 437, 473, 516, 557,
- 600, 642, 687, 736, 782, 835, 886, 941, 999, 1050, 1111, 1167, 1234,
- 1297, 1357, 1424, 1491, 1564, 1636, 1703, 1778, 1852, 1931, 2012, 2095, 2177,
- 2256, 2341, 2425, 2518, 2605, 2698, 2788, 2883, 2982, 3078, 3177, 5, 7,
- 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150, 170,
- 195, 218, 249, 277, 304, 337, 369, 406, 441, 478, 520, 559, 604,
- 646, 689, 740, 788, 841, 890, 945, 1001, 1052, 1115, 1173, 1236, 1301,
- 1362, 1428, 1497, 1568, 1638, 1707, 1786, 1858, 1935, 2016, 2097, 2181, 2260,
- 2343, 2431, 2520, 2613, 2702, 2790, 2889, 2984, 3082, 3181, 10, 12, 14,
- 19, 23, 31, 41, 52, 65, 81, 96, 113, 133, 152, 175, 201,
- 224, 253, 279, 310, 341, 375, 410, 445, 484, 524, 563, 606, 648,
- 697, 746, 793, 843, 896, 949, 1005, 1060, 1119, 1181, 1242, 1303, 1366,
- 1436, 1503, 1572, 1640, 1713, 1790, 1865, 1943, 2018, 2103, 2183, 2266, 2347,
- 2437, 2526, 2617, 2708, 2800, 2893, 2992, 3086, 3189, 16, 18, 21, 24,
- 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181, 203, 230,
- 255, 286, 316, 347, 380, 414, 451, 490, 530, 571, 612, 656, 705,
- 750, 799, 849, 898, 959, 1009, 1066, 1127, 1184, 1246, 1307, 1376, 1440,
- 1509, 1578, 1644, 1723, 1794, 1871, 1947, 2024, 2109, 2185, 2270, 2361, 2443,
- 2536, 2619, 2710, 2806, 2899, 2998, 3090, 3193, 25, 27, 29, 32, 40,
- 46, 54, 67, 79, 94, 109, 127, 143, 164, 185, 210, 236, 263,
- 292, 320, 353, 388, 422, 459, 496, 533, 579, 618, 665, 711, 754,
- 809, 857, 910, 961, 1015, 1074, 1131, 1194, 1254, 1315, 1384, 1448, 1517,
- 1584, 1655, 1731, 1802, 1875, 1959, 2034, 2115, 2197, 2280, 2367, 2452, 2538,
- 2625, 2722, 2816, 2907, 3004, 3100, 3203, 34, 36, 38, 42, 49, 55,
- 64, 76, 87, 102, 117, 135, 154, 176, 197, 222, 247, 272, 298,
- 329, 361, 392, 427, 465, 504, 545, 585, 626, 671, 717, 766, 813,
- 862, 916, 971, 1028, 1084, 1139, 1200, 1264, 1325, 1390, 1452, 1523, 1594,
- 1667, 1737, 1806, 1887, 1963, 2046, 2123, 2202, 2290, 2371, 2462, 2548, 2641,
- 2732, 2822, 2917, 3010, 3111, 3211, 44, 47, 51, 53, 60, 68, 77,
- 85, 98, 114, 131, 147, 162, 183, 208, 232, 256, 283, 314, 343,
- 373, 408, 442, 475, 511, 551, 592, 638, 681, 726, 772, 821, 874,
- 926, 979, 1034, 1088, 1153, 1214, 1271, 1335, 1396, 1469, 1533, 1600, 1673,
- 1745, 1824, 1897, 1973, 2054, 2131, 2216, 2300, 2383, 2468, 2558, 2649, 2740,
- 2829, 2923, 3022, 3123, 3221, 57, 61, 63, 66, 70, 80, 88, 99,
- 112, 124, 140, 159, 179, 199, 219, 240, 267, 294, 322, 354, 386,
- 418, 455, 492, 528, 567, 608, 649, 695, 742, 786, 836, 882, 933,
- 989, 1046, 1101, 1161, 1216, 1279, 1343, 1410, 1479, 1543, 1614, 1687, 1758,
- 1832, 1905, 1980, 2066, 2141, 2226, 2306, 2395, 2484, 2566, 2659, 2750, 2845,
- 2939, 3032, 3133, 3225, 72, 74, 78, 82, 84, 95, 103, 115, 125,
- 139, 156, 173, 190, 211, 234, 259, 281, 311, 339, 366, 394, 433,
- 466, 500, 543, 581, 622, 667, 707, 752, 803, 853, 899, 955, 1007,
- 1064, 1117, 1175, 1237, 1299, 1354, 1420, 1485, 1556, 1624, 1697, 1770, 1842,
- 1919, 1998, 2074, 2155, 2234, 2319, 2409, 2492, 2581, 2671, 2760, 2859, 2949,
- 3046, 3145, 3245, 89, 91, 93, 97, 101, 110, 118, 132, 141, 157,
- 171, 186, 206, 228, 251, 273, 296, 324, 351, 384, 415, 447, 482,
- 521, 554, 593, 636, 677, 722, 770, 815, 866, 914, 967, 1022, 1078,
- 1135, 1195, 1252, 1313, 1378, 1444, 1507, 1576, 1642, 1714, 1788, 1860, 1933,
- 2013, 2085, 2169, 2250, 2337, 2417, 2502, 2597, 2683, 2778, 2869, 2960, 3060,
- 3157, 3256, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187,
- 205, 225, 244, 265, 290, 317, 344, 370, 398, 431, 463, 498, 534,
- 573, 616, 654, 698, 743, 783, 831, 880, 928, 983, 1036, 1092, 1149,
- 1208, 1266, 1333, 1394, 1457, 1524, 1590, 1665, 1733, 1804, 1879, 1953, 2030,
- 2111, 2189, 2271, 2357, 2441, 2534, 2615, 2704, 2791, 2887, 2979, 3072, 3167,
- 3270, 122, 126, 130, 134, 138, 144, 155, 163, 180, 191, 207, 226,
- 238, 261, 287, 308, 332, 359, 390, 419, 449, 485, 518, 549, 587,
- 630, 672, 715, 760, 805, 855, 900, 953, 1003, 1053, 1108, 1163, 1220,
- 1287, 1345, 1408, 1473, 1541, 1608, 1677, 1749, 1826, 1898, 1971, 2048, 2127,
- 2208, 2294, 2373, 2458, 2542, 2631, 2726, 2818, 2908, 3002, 3094, 3199, 3286,
- 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 229, 245, 262,
- 284, 305, 327, 355, 382, 411, 438, 469, 501, 539, 577, 613, 652,
- 690, 730, 776, 822, 872, 922, 973, 1024, 1079, 1132, 1188, 1250, 1305,
- 1367, 1432, 1492, 1560, 1626, 1693, 1766, 1838, 1911, 1992, 2068, 2149, 2228,
- 2307, 2393, 2478, 2564, 2655, 2742, 2833, 2927, 3020, 3119, 3219, 3298, 167,
- 169, 172, 178, 182, 188, 198, 209, 220, 235, 252, 266, 288, 306,
- 326, 349, 378, 403, 428, 461, 494, 526, 560, 594, 632, 675, 713,
- 755, 801, 845, 892, 942, 990, 1042, 1096, 1155, 1212, 1267, 1329, 1391,
- 1450, 1519, 1582, 1650, 1724, 1792, 1862, 1936, 2007, 2083, 2167, 2246, 2329,
- 2413, 2496, 2585, 2675, 2761, 2855, 2947, 3040, 3135, 3233, 3320, 192, 194,
- 196, 202, 204, 213, 223, 233, 241, 260, 274, 291, 309, 328, 350,
- 376, 395, 425, 457, 488, 512, 547, 583, 619, 659, 699, 737, 778,
- 819, 868, 917, 965, 1013, 1072, 1123, 1176, 1231, 1289, 1351, 1414, 1474,
- 1539, 1604, 1674, 1741, 1816, 1891, 1961, 2040, 2116, 2191, 2276, 2353, 2438,
- 2524, 2606, 2689, 2784, 2871, 2968, 3062, 3161, 3257, 3334, 215, 217, 221,
- 227, 231, 237, 248, 257, 268, 282, 297, 318, 333, 356, 379, 396,
- 424, 452, 479, 508, 541, 574, 609, 643, 679, 719, 764, 806, 850,
- 894, 938, 987, 1038, 1089, 1145, 1204, 1258, 1316, 1379, 1438, 1501, 1565,
- 1628, 1694, 1764, 1836, 1907, 1981, 2060, 2137, 2220, 2298, 2377, 2464, 2549,
- 2635, 2724, 2812, 2903, 2999, 3088, 3185, 3278, 3350, 242, 246, 250, 254,
- 258, 264, 275, 285, 295, 312, 325, 345, 360, 383, 404, 426, 453,
- 476, 506, 535, 568, 601, 634, 669, 708, 748, 789, 829, 875, 923,
- 968, 1016, 1068, 1120, 1168, 1224, 1280, 1341, 1402, 1465, 1531, 1591, 1661,
- 1729, 1795, 1867, 1937, 2004, 2079, 2159, 2242, 2320, 2405, 2488, 2573, 2661,
- 2744, 2839, 2933, 3023, 3117, 3215, 3296, 3373, 270, 276, 278, 280, 289,
- 293, 299, 315, 323, 340, 352, 371, 391, 412, 429, 458, 480, 507,
- 532, 564, 590, 627, 663, 703, 733, 773, 816, 859, 906, 950, 993,
- 1043, 1094, 1147, 1201, 1256, 1311, 1372, 1429, 1486, 1550, 1618, 1685, 1751,
- 1827, 1895, 1965, 2042, 2119, 2192, 2268, 2348, 2429, 2512, 2599, 2684, 2772,
- 2863, 2951, 3048, 3143, 3239, 3324, 3393, 301, 303, 307, 313, 319, 321,
- 330, 346, 357, 367, 385, 399, 420, 439, 462, 489, 509, 536, 565,
- 589, 624, 661, 691, 727, 768, 810, 846, 887, 929, 977, 1029, 1076,
- 1128, 1177, 1226, 1283, 1339, 1397, 1461, 1521, 1585, 1648, 1715, 1779, 1848,
- 1923, 1996, 2069, 2142, 2224, 2302, 2381, 2465, 2544, 2627, 2720, 2807, 2895,
- 2985, 3073, 3163, 3264, 3338, 3413, 334, 336, 338, 342, 348, 358, 362,
- 374, 387, 397, 416, 432, 450, 470, 495, 513, 542, 569, 591, 625,
- 657, 684, 723, 762, 797, 837, 878, 920, 963, 1010, 1054, 1105, 1157,
- 1206, 1262, 1317, 1374, 1433, 1483, 1545, 1615, 1681, 1743, 1812, 1885, 1954,
- 2025, 2101, 2174, 2248, 2330, 2411, 2490, 2579, 2663, 2745, 2835, 2924, 3018,
- 3115, 3205, 3290, 3363, 3431, 364, 368, 372, 377, 381, 389, 393, 409,
- 421, 434, 448, 464, 486, 502, 527, 548, 575, 602, 628, 662, 685,
- 721, 756, 794, 827, 869, 912, 956, 996, 1040, 1086, 1137, 1189, 1243,
- 1291, 1349, 1404, 1466, 1525, 1588, 1645, 1711, 1774, 1843, 1909, 1988, 2058,
- 2132, 2209, 2288, 2368, 2445, 2527, 2607, 2687, 2780, 2865, 2953, 3049, 3139,
- 3237, 3318, 3387, 3451, 401, 405, 407, 413, 417, 423, 430, 443, 456,
- 467, 483, 499, 519, 540, 561, 584, 610, 635, 664, 692, 724, 757,
- 792, 825, 863, 908, 946, 985, 1032, 1080, 1125, 1169, 1217, 1275, 1330,
- 1386, 1441, 1498, 1554, 1619, 1683, 1746, 1810, 1883, 1949, 2019, 2086, 2165,
- 2238, 2314, 2399, 2479, 2562, 2645, 2733, 2820, 2904, 2996, 3083, 3168, 3268,
- 3339, 3407, 3474, 436, 440, 444, 446, 454, 460, 468, 477, 493, 503,
- 522, 537, 550, 578, 595, 620, 644, 670, 704, 728, 763, 795, 826,
- 861, 901, 935, 980, 1025, 1069, 1112, 1159, 1209, 1260, 1309, 1363, 1418,
- 1475, 1534, 1598, 1656, 1721, 1780, 1846, 1912, 1982, 2056, 2129, 2199, 2278,
- 2358, 2432, 2508, 2593, 2677, 2762, 2851, 2941, 3030, 3124, 3216, 3294, 3365,
- 3433, 3488, 472, 474, 481, 487, 491, 497, 505, 514, 529, 544, 555,
- 576, 588, 614, 633, 660, 680, 709, 734, 769, 798, 828, 864, 902,
- 932, 975, 1020, 1061, 1102, 1150, 1198, 1247, 1294, 1346, 1400, 1455, 1513,
- 1573, 1629, 1689, 1755, 1820, 1888, 1955, 2022, 2092, 2163, 2235, 2312, 2389,
- 2472, 2554, 2632, 2716, 2804, 2884, 2974, 3063, 3153, 3250, 3326, 3395, 3454,
- 3512, 515, 517, 523, 525, 531, 538, 546, 552, 570, 582, 596, 617,
- 631, 653, 676, 700, 720, 749, 774, 811, 838, 870, 909, 936, 976,
- 1017, 1058, 1099, 1143, 1192, 1238, 1284, 1336, 1388, 1445, 1493, 1546, 1610,
- 1671, 1734, 1796, 1856, 1925, 1994, 2062, 2133, 2206, 2281, 2354, 2426, 2503,
- 2587, 2669, 2754, 2843, 2928, 3016, 3105, 3201, 3284, 3351, 3421, 3480, 3534,
- 556, 558, 562, 566, 572, 580, 586, 597, 611, 623, 637, 655, 673,
- 693, 714, 738, 765, 790, 817, 847, 879, 913, 947, 981, 1021, 1059,
- 1097, 1140, 1185, 1227, 1277, 1327, 1380, 1425, 1481, 1537, 1595, 1651, 1708,
- 1771, 1834, 1901, 1966, 2035, 2107, 2170, 2244, 2315, 2396, 2474, 2552, 2628,
- 2711, 2792, 2875, 2966, 3056, 3146, 3234, 3314, 3383, 3445, 3504, 3559, 599,
- 603, 605, 607, 615, 621, 629, 639, 650, 668, 678, 701, 716, 731,
- 758, 779, 807, 830, 860, 888, 921, 957, 986, 1026, 1062, 1100, 1141,
- 1183, 1221, 1272, 1323, 1368, 1416, 1471, 1526, 1580, 1633, 1691, 1752, 1817,
- 1876, 1944, 2002, 2072, 2143, 2218, 2291, 2363, 2435, 2509, 2589, 2672, 2752,
- 2840, 2921, 3008, 3095, 3190, 3274, 3344, 3409, 3470, 3526, 3577, 641, 645,
- 647, 651, 658, 666, 674, 682, 696, 710, 725, 744, 761, 777, 802,
- 820, 851, 876, 907, 930, 964, 997, 1033, 1070, 1103, 1144, 1186, 1222,
- 1270, 1318, 1360, 1411, 1463, 1515, 1569, 1622, 1678, 1739, 1800, 1853, 1917,
- 1983, 2052, 2121, 2186, 2253, 2331, 2406, 2482, 2559, 2639, 2717, 2798, 2877,
- 2961, 3052, 3137, 3226, 3306, 3379, 3437, 3492, 3553, 3601, 686, 688, 694,
- 702, 706, 712, 718, 729, 745, 753, 771, 784, 808, 823, 848, 871,
- 895, 924, 951, 978, 1011, 1041, 1081, 1113, 1151, 1193, 1228, 1273, 1319,
- 1358, 1406, 1458, 1510, 1557, 1612, 1669, 1727, 1781, 1839, 1903, 1969, 2031,
- 2098, 2160, 2232, 2304, 2375, 2453, 2528, 2601, 2679, 2758, 2846, 2929, 3011,
- 3098, 3186, 3271, 3340, 3401, 3466, 3522, 3571, 3620, 735, 739, 741, 747,
- 751, 759, 767, 775, 787, 804, 818, 832, 856, 873, 893, 918, 939,
- 969, 994, 1030, 1055, 1087, 1126, 1160, 1199, 1239, 1278, 1324, 1361, 1407,
- 1453, 1505, 1551, 1605, 1663, 1716, 1768, 1830, 1893, 1951, 2008, 2075, 2139,
- 2214, 2284, 2349, 2418, 2494, 2571, 2653, 2734, 2810, 2890, 2972, 3058, 3147,
- 3231, 3310, 3375, 3435, 3490, 3545, 3595, 3642, 781, 785, 791, 796, 800,
- 812, 814, 824, 839, 854, 867, 881, 903, 925, 943, 966, 988, 1018,
- 1044, 1077, 1106, 1138, 1170, 1210, 1248, 1285, 1328, 1369, 1412, 1459, 1506,
- 1549, 1601, 1657, 1704, 1762, 1821, 1880, 1938, 1999, 2063, 2125, 2193, 2257,
- 2327, 2401, 2475, 2545, 2620, 2691, 2776, 2860, 2942, 3024, 3109, 3197, 3276,
- 3345, 3403, 3468, 3520, 3569, 3616, 3664, 834, 840, 842, 844, 852, 858,
- 865, 877, 883, 904, 915, 931, 954, 974, 991, 1014, 1039, 1071, 1095,
- 1129, 1158, 1190, 1218, 1261, 1295, 1337, 1381, 1417, 1464, 1511, 1552, 1602,
- 1654, 1699, 1759, 1813, 1872, 1927, 1990, 2049, 2113, 2178, 2239, 2308, 2378,
- 2450, 2521, 2594, 2667, 2746, 2824, 2909, 2990, 3070, 3154, 3243, 3316, 3381,
- 3441, 3493, 3547, 3597, 3640, 3682, 885, 889, 891, 897, 905, 911, 919,
- 927, 934, 958, 970, 984, 1004, 1027, 1045, 1073, 1090, 1121, 1148, 1178,
- 1207, 1244, 1276, 1310, 1347, 1389, 1426, 1472, 1516, 1558, 1606, 1658, 1700,
- 1757, 1807, 1868, 1920, 1978, 2043, 2104, 2157, 2229, 2296, 2364, 2422, 2498,
- 2574, 2650, 2727, 2801, 2872, 2954, 3038, 3129, 3212, 3288, 3352, 3419, 3475,
- 3524, 3573, 3621, 3668, 3707, 940, 944, 948, 952, 960, 962, 972, 982,
- 992, 1008, 1023, 1037, 1056, 1082, 1098, 1124, 1146, 1171, 1202, 1229, 1263,
- 1292, 1331, 1364, 1401, 1446, 1482, 1527, 1570, 1613, 1664, 1705, 1760, 1808,
- 1863, 1915, 1976, 2036, 2087, 2153, 2221, 2286, 2344, 2414, 2486, 2556, 2623,
- 2699, 2773, 2853, 2937, 3012, 3091, 3169, 3260, 3330, 3391, 3447, 3505, 3555,
- 3603, 3646, 3684, 3727, 998, 1000, 1002, 1006, 1012, 1019, 1031, 1035, 1047,
- 1065, 1083, 1093, 1109, 1133, 1156, 1179, 1205, 1225, 1257, 1286, 1320, 1350,
- 1387, 1419, 1456, 1494, 1538, 1581, 1623, 1670, 1717, 1763, 1814, 1869, 1916,
- 1974, 2028, 2081, 2150, 2212, 2272, 2335, 2403, 2469, 2539, 2608, 2680, 2755,
- 2827, 2915, 2986, 3068, 3151, 3229, 3300, 3366, 3427, 3484, 3532, 3581, 3630,
- 3672, 3709, 3745, 1049, 1051, 1057, 1063, 1067, 1075, 1085, 1091, 1104, 1118,
- 1136, 1152, 1164, 1191, 1213, 1232, 1259, 1281, 1312, 1340, 1375, 1405, 1442,
- 1476, 1514, 1547, 1596, 1634, 1679, 1728, 1769, 1822, 1873, 1921, 1977, 2029,
- 2078, 2144, 2203, 2264, 2325, 2390, 2459, 2529, 2591, 2665, 2738, 2813, 2880,
- 2957, 3041, 3127, 3206, 3282, 3348, 3399, 3460, 3513, 3565, 3609, 3650, 3695,
- 3733, 3768, 1110, 1114, 1116, 1122, 1130, 1134, 1142, 1154, 1162, 1180, 1196,
- 1211, 1223, 1251, 1268, 1290, 1321, 1342, 1373, 1398, 1434, 1467, 1499, 1535,
- 1574, 1611, 1652, 1692, 1740, 1782, 1831, 1881, 1928, 1979, 2037, 2082, 2145,
- 2200, 2261, 2321, 2387, 2454, 2513, 2583, 2656, 2730, 2793, 2867, 2945, 3025,
- 3101, 3178, 3262, 3328, 3388, 3443, 3494, 3543, 3591, 3636, 3678, 3715, 3754,
- 3790, 1166, 1172, 1174, 1182, 1187, 1197, 1203, 1215, 1219, 1240, 1253, 1269,
- 1288, 1306, 1332, 1352, 1382, 1403, 1430, 1462, 1484, 1528, 1555, 1599, 1630,
- 1672, 1709, 1753, 1801, 1840, 1894, 1939, 1991, 2044, 2088, 2151, 2204, 2262,
- 2318, 2384, 2448, 2504, 2577, 2646, 2712, 2782, 2856, 2934, 3006, 3079, 3158,
- 3240, 3307, 3371, 3425, 3481, 3530, 3575, 3618, 3660, 3701, 3741, 3774, 3807,
- 1233, 1235, 1241, 1245, 1249, 1255, 1265, 1274, 1282, 1300, 1314, 1334, 1348,
- 1370, 1392, 1415, 1439, 1468, 1487, 1522, 1548, 1589, 1620, 1659, 1690, 1735,
- 1772, 1818, 1854, 1904, 1952, 2000, 2050, 2105, 2154, 2213, 2265, 2322, 2385,
- 2446, 2500, 2569, 2642, 2705, 2770, 2849, 2919, 2993, 3064, 3140, 3223, 3292,
- 3353, 3414, 3464, 3516, 3561, 3607, 3648, 3687, 3725, 3762, 3796, 3827, 1296,
- 1298, 1302, 1304, 1308, 1322, 1326, 1338, 1344, 1355, 1383, 1395, 1409, 1435,
- 1451, 1477, 1502, 1532, 1553, 1586, 1616, 1646, 1684, 1722, 1756, 1797, 1835,
- 1877, 1918, 1970, 2009, 2064, 2114, 2158, 2222, 2273, 2326, 2388, 2449, 2501,
- 2567, 2636, 2695, 2768, 2836, 2910, 2976, 3053, 3131, 3209, 3279, 3336, 3397,
- 3449, 3500, 3549, 3593, 3634, 3676, 3713, 3747, 3784, 3817, 3845, 1356, 1359,
- 1365, 1371, 1377, 1385, 1393, 1399, 1413, 1421, 1447, 1460, 1478, 1495, 1520,
- 1540, 1566, 1592, 1621, 1649, 1682, 1712, 1747, 1783, 1823, 1857, 1902, 1945,
- 1984, 2032, 2076, 2126, 2179, 2230, 2287, 2336, 2391, 2455, 2505, 2570, 2637,
- 2692, 2763, 2830, 2901, 2969, 3044, 3120, 3194, 3265, 3331, 3385, 3439, 3486,
- 3536, 3582, 3626, 3665, 3703, 3739, 3772, 3802, 3835, 3864, 1423, 1427, 1431,
- 1437, 1443, 1449, 1454, 1470, 1480, 1488, 1508, 1529, 1542, 1561, 1583, 1607,
- 1631, 1662, 1686, 1718, 1744, 1775, 1811, 1847, 1889, 1926, 1967, 2003, 2053,
- 2099, 2140, 2194, 2240, 2297, 2345, 2404, 2460, 2514, 2578, 2643, 2696, 2764,
- 2826, 2897, 2962, 3036, 3112, 3182, 3254, 3321, 3376, 3429, 3478, 3527, 3567,
- 3611, 3652, 3693, 3731, 3764, 3794, 3825, 3853, 3882, 1490, 1496, 1500, 1504,
- 1512, 1518, 1530, 1536, 1544, 1559, 1577, 1593, 1609, 1627, 1653, 1675, 1695,
- 1730, 1754, 1784, 1815, 1844, 1884, 1913, 1956, 1995, 2038, 2073, 2122, 2161,
- 2215, 2258, 2309, 2365, 2415, 2470, 2530, 2584, 2647, 2706, 2769, 2831, 2898,
- 2959, 3033, 3106, 3170, 3252, 3312, 3367, 3423, 3471, 3518, 3563, 3605, 3644,
- 3680, 3717, 3755, 3788, 3819, 3847, 3874, 3898, 1563, 1567, 1571, 1575, 1579,
- 1587, 1597, 1603, 1617, 1625, 1643, 1666, 1680, 1696, 1725, 1742, 1765, 1798,
- 1828, 1849, 1886, 1910, 1950, 1985, 2023, 2065, 2108, 2146, 2187, 2233, 2285,
- 2328, 2379, 2423, 2487, 2540, 2592, 2657, 2713, 2771, 2837, 2902, 2963, 3034,
- 3104, 3164, 3248, 3304, 3361, 3417, 3462, 3510, 3557, 3598, 3638, 3674, 3711,
- 3743, 3776, 3811, 3839, 3868, 3892, 3917, 1635, 1637, 1639, 1641, 1647, 1660,
- 1668, 1676, 1688, 1698, 1719, 1736, 1750, 1767, 1793, 1819, 1837, 1870, 1896,
- 1924, 1957, 1989, 2020, 2057, 2093, 2134, 2171, 2219, 2254, 2305, 2350, 2402,
- 2451, 2499, 2557, 2609, 2666, 2731, 2783, 2850, 2911, 2970, 3037, 3107, 3165,
- 3246, 3301, 3359, 3410, 3458, 3508, 3551, 3589, 3632, 3670, 3705, 3737, 3770,
- 3800, 3829, 3858, 3886, 3911, 3933, 1702, 1706, 1710, 1720, 1726, 1732, 1738,
- 1748, 1761, 1773, 1789, 1805, 1829, 1841, 1864, 1892, 1908, 1940, 1968, 1997,
- 2026, 2059, 2089, 2130, 2164, 2207, 2245, 2292, 2332, 2376, 2419, 2476, 2522,
- 2575, 2624, 2681, 2739, 2794, 2857, 2920, 2977, 3045, 3113, 3171, 3249, 3302,
- 3358, 3404, 3455, 3502, 3541, 3587, 3628, 3661, 3699, 3735, 3766, 3797, 3823,
- 3851, 3876, 3903, 3927, 3950, 1777, 1785, 1787, 1791, 1799, 1803, 1809, 1825,
- 1833, 1845, 1861, 1882, 1899, 1914, 1941, 1962, 1986, 2005, 2045, 2070, 2102,
- 2135, 2166, 2201, 2236, 2282, 2316, 2366, 2407, 2456, 2495, 2546, 2595, 2651,
- 2700, 2756, 2814, 2868, 2935, 2994, 3054, 3121, 3183, 3253, 3305, 3360, 3405,
- 3453, 3498, 3539, 3585, 3622, 3658, 3697, 3728, 3760, 3792, 3821, 3849, 3872,
- 3896, 3919, 3942, 3964, 1851, 1855, 1859, 1866, 1874, 1878, 1890, 1900, 1906,
- 1922, 1934, 1958, 1972, 1993, 2010, 2041, 2061, 2080, 2120, 2147, 2175, 2210,
- 2241, 2279, 2313, 2355, 2397, 2436, 2483, 2531, 2572, 2621, 2668, 2728, 2774,
- 2828, 2881, 2946, 3007, 3065, 3132, 3195, 3255, 3313, 3362, 3411, 3456, 3499,
- 3538, 3579, 3614, 3656, 3691, 3723, 3758, 3786, 3815, 3843, 3870, 3894, 3915,
- 3937, 3956, 3975, 1930, 1932, 1942, 1946, 1948, 1960, 1964, 1975, 1987, 2001,
- 2014, 2033, 2051, 2071, 2084, 2117, 2138, 2162, 2195, 2225, 2249, 2289, 2317,
- 2359, 2392, 2427, 2477, 2510, 2560, 2602, 2654, 2693, 2747, 2802, 2854, 2916,
- 2958, 3026, 3080, 3141, 3210, 3266, 3322, 3368, 3418, 3459, 3503, 3540, 3580,
- 3613, 3654, 3688, 3721, 3752, 3782, 3813, 3841, 3865, 3890, 3913, 3935, 3954,
- 3972, 3989, 2011, 2015, 2017, 2021, 2027, 2039, 2047, 2055, 2067, 2077, 2090,
- 2112, 2128, 2152, 2168, 2196, 2223, 2243, 2269, 2303, 2333, 2369, 2400, 2433,
- 2473, 2506, 2553, 2590, 2640, 2682, 2735, 2777, 2825, 2873, 2938, 2987, 3042,
- 3102, 3159, 3224, 3280, 3332, 3377, 3424, 3463, 3509, 3542, 3586, 3615, 3655,
- 3685, 3719, 3750, 3780, 3809, 3836, 3862, 3888, 3909, 3931, 3952, 3970, 3987,
- 4003, 2094, 2096, 2100, 2106, 2110, 2118, 2124, 2136, 2148, 2156, 2172, 2190,
- 2211, 2231, 2247, 2277, 2299, 2323, 2351, 2382, 2412, 2447, 2480, 2511, 2555,
- 2588, 2629, 2673, 2718, 2759, 2811, 2861, 2912, 2955, 3013, 3069, 3128, 3179,
- 3241, 3293, 3337, 3386, 3430, 3472, 3511, 3552, 3588, 3623, 3657, 3689, 3720,
- 3749, 3778, 3805, 3833, 3860, 3884, 3907, 3929, 3948, 3968, 3985, 4001, 4016,
- 2176, 2180, 2182, 2184, 2188, 2198, 2205, 2217, 2227, 2237, 2251, 2274, 2295,
- 2310, 2334, 2356, 2380, 2408, 2430, 2466, 2491, 2532, 2563, 2596, 2633, 2670,
- 2714, 2753, 2799, 2847, 2891, 2943, 2991, 3039, 3092, 3152, 3207, 3263, 3308,
- 3354, 3398, 3440, 3479, 3519, 3558, 3590, 3629, 3659, 3692, 3722, 3751, 3779,
- 3804, 3831, 3856, 3880, 3905, 3925, 3946, 3966, 3983, 3999, 4014, 4028, 2255,
- 2259, 2263, 2267, 2275, 2283, 2293, 2301, 2311, 2324, 2338, 2360, 2374, 2394,
- 2416, 2439, 2467, 2489, 2515, 2547, 2580, 2610, 2648, 2678, 2719, 2757, 2795,
- 2841, 2878, 2930, 2973, 3027, 3071, 3130, 3172, 3230, 3283, 3329, 3372, 3415,
- 3450, 3487, 3528, 3564, 3599, 3633, 3662, 3698, 3724, 3753, 3781, 3806, 3832,
- 3855, 3878, 3901, 3923, 3944, 3962, 3981, 3997, 4012, 4026, 4039, 2340, 2342,
- 2346, 2352, 2362, 2370, 2372, 2386, 2398, 2410, 2420, 2442, 2461, 2481, 2497,
- 2525, 2550, 2576, 2600, 2630, 2664, 2688, 2736, 2765, 2805, 2844, 2876, 2922,
- 2964, 3014, 3059, 3110, 3155, 3213, 3261, 3303, 3349, 3389, 3426, 3465, 3501,
- 3537, 3568, 3606, 3639, 3671, 3700, 3729, 3759, 3783, 3810, 3834, 3857, 3879,
- 3900, 3921, 3940, 3960, 3979, 3995, 4010, 4024, 4037, 4049, 2424, 2428, 2434,
- 2440, 2444, 2457, 2463, 2471, 2485, 2493, 2507, 2535, 2543, 2565, 2586, 2611,
- 2638, 2662, 2685, 2721, 2748, 2781, 2821, 2852, 2885, 2931, 2967, 3009, 3055,
- 3099, 3148, 3198, 3244, 3289, 3333, 3369, 3400, 3444, 3482, 3517, 3550, 3583,
- 3612, 3645, 3675, 3706, 3736, 3761, 3787, 3814, 3837, 3861, 3881, 3902, 3922,
- 3939, 3958, 3977, 3993, 4008, 4022, 4035, 4047, 4058, 2517, 2519, 2523, 2533,
- 2537, 2541, 2551, 2561, 2568, 2582, 2598, 2616, 2634, 2658, 2676, 2690, 2725,
- 2749, 2775, 2808, 2838, 2866, 2905, 2944, 2975, 3017, 3057, 3096, 3138, 3187,
- 3232, 3277, 3317, 3355, 3392, 3428, 3461, 3495, 3531, 3562, 3594, 3627, 3653,
- 3681, 3712, 3738, 3767, 3793, 3816, 3842, 3863, 3885, 3906, 3924, 3941, 3959,
- 3974, 3991, 4006, 4020, 4033, 4045, 4056, 4066, 2604, 2612, 2614, 2618, 2622,
- 2626, 2644, 2652, 2660, 2674, 2686, 2707, 2729, 2743, 2766, 2785, 2815, 2842,
- 2864, 2896, 2925, 2956, 2997, 3031, 3066, 3108, 3149, 3191, 3227, 3272, 3311,
- 3346, 3382, 3420, 3448, 3485, 3514, 3544, 3576, 3608, 3635, 3666, 3694, 3718,
- 3744, 3771, 3798, 3822, 3844, 3866, 3889, 3908, 3926, 3945, 3961, 3978, 3992,
- 4005, 4018, 4031, 4043, 4054, 4064, 4073, 2697, 2701, 2703, 2709, 2715, 2723,
- 2737, 2741, 2751, 2767, 2779, 2796, 2819, 2834, 2858, 2874, 2906, 2936, 2952,
- 2988, 3019, 3050, 3084, 3125, 3156, 3202, 3235, 3275, 3309, 3341, 3378, 3406,
- 3442, 3476, 3506, 3533, 3566, 3592, 3619, 3649, 3677, 3704, 3732, 3756, 3777,
- 3801, 3824, 3850, 3871, 3891, 3910, 3930, 3947, 3963, 3980, 3994, 4007, 4019,
- 4030, 4041, 4052, 4062, 4071, 4079, 2787, 2789, 2797, 2803, 2809, 2817, 2823,
- 2832, 2848, 2862, 2870, 2888, 2913, 2932, 2948, 2971, 3000, 3028, 3051, 3074,
- 3116, 3142, 3173, 3217, 3251, 3285, 3315, 3347, 3380, 3402, 3436, 3469, 3496,
- 3525, 3556, 3584, 3610, 3637, 3663, 3690, 3714, 3740, 3765, 3789, 3812, 3830,
- 3852, 3873, 3895, 3914, 3932, 3949, 3967, 3982, 3996, 4009, 4021, 4032, 4042,
- 4051, 4060, 4069, 4077, 4084, 2882, 2886, 2892, 2894, 2900, 2914, 2918, 2926,
- 2940, 2950, 2965, 2980, 3003, 3021, 3043, 3067, 3089, 3118, 3144, 3166, 3208,
- 3238, 3269, 3295, 3327, 3356, 3384, 3412, 3438, 3467, 3491, 3521, 3548, 3574,
- 3604, 3631, 3651, 3679, 3702, 3726, 3748, 3773, 3795, 3820, 3840, 3859, 3877,
- 3897, 3916, 3936, 3953, 3969, 3984, 3998, 4011, 4023, 4034, 4044, 4053, 4061,
- 4068, 4075, 4082, 4088, 2981, 2983, 2989, 2995, 3001, 3005, 3015, 3029, 3035,
- 3047, 3061, 3075, 3097, 3122, 3136, 3162, 3188, 3218, 3242, 3267, 3291, 3319,
- 3342, 3370, 3396, 3422, 3446, 3473, 3497, 3523, 3546, 3570, 3600, 3624, 3647,
- 3673, 3696, 3716, 3742, 3763, 3785, 3803, 3826, 3848, 3869, 3887, 3904, 3920,
- 3938, 3955, 3971, 3986, 4000, 4013, 4025, 4036, 4046, 4055, 4063, 4070, 4076,
- 4081, 4086, 4091, 3077, 3081, 3085, 3087, 3093, 3103, 3114, 3126, 3134, 3150,
- 3160, 3174, 3200, 3220, 3236, 3258, 3281, 3297, 3325, 3343, 3364, 3390, 3408,
- 3434, 3457, 3483, 3507, 3529, 3554, 3572, 3596, 3617, 3641, 3669, 3686, 3710,
- 3734, 3757, 3775, 3799, 3818, 3838, 3854, 3875, 3893, 3912, 3928, 3943, 3957,
- 3973, 3988, 4002, 4015, 4027, 4038, 4048, 4057, 4065, 4072, 4078, 4083, 4087,
- 4090, 4093, 3176, 3180, 3184, 3192, 3196, 3204, 3214, 3222, 3228, 3247, 3259,
- 3273, 3287, 3299, 3323, 3335, 3357, 3374, 3394, 3416, 3432, 3452, 3477, 3489,
- 3515, 3535, 3560, 3578, 3602, 3625, 3643, 3667, 3683, 3708, 3730, 3746, 3769,
- 3791, 3808, 3828, 3846, 3867, 3883, 3899, 3918, 3934, 3951, 3965, 3976, 3990,
- 4004, 4017, 4029, 4040, 4050, 4059, 4067, 4074, 4080, 4085, 4089, 4092, 4094,
- 4095,
-};
-#endif // CONFIG_TX64X64
+ 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90,
+ 91, 119, 120, 152, 153, 189, 190, 230, 231, 275, 276, 324, 325,
+ 377, 378, 434, 435, 495, 496, 2, 4, 7, 13, 16, 26, 29,
+ 43, 46, 64, 67, 89, 92, 118, 121, 151, 154, 188, 191, 229,
+ 232, 274, 277, 323, 326, 376, 379, 433, 436, 494, 497, 558, 3,
+ 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117, 122,
+ 150, 155, 187, 192, 228, 233, 273, 278, 322, 327, 375, 380, 432,
+ 437, 493, 498, 557, 559, 9, 11, 18, 24, 31, 41, 48, 62,
+ 69, 87, 94, 116, 123, 149, 156, 186, 193, 227, 234, 272, 279,
+ 321, 328, 374, 381, 431, 438, 492, 499, 556, 560, 617, 10, 19,
+ 23, 32, 40, 49, 61, 70, 86, 95, 115, 124, 148, 157, 185,
+ 194, 226, 235, 271, 280, 320, 329, 373, 382, 430, 439, 491, 500,
+ 555, 561, 616, 618, 20, 22, 33, 39, 50, 60, 71, 85, 96,
+ 114, 125, 147, 158, 184, 195, 225, 236, 270, 281, 319, 330, 372,
+ 383, 429, 440, 490, 501, 554, 562, 615, 619, 672, 21, 34, 38,
+ 51, 59, 72, 84, 97, 113, 126, 146, 159, 183, 196, 224, 237,
+ 269, 282, 318, 331, 371, 384, 428, 441, 489, 502, 553, 563, 614,
+ 620, 671, 673, 35, 37, 52, 58, 73, 83, 98, 112, 127, 145,
+ 160, 182, 197, 223, 238, 268, 283, 317, 332, 370, 385, 427, 442,
+ 488, 503, 552, 564, 613, 621, 670, 674, 723, 36, 53, 57, 74,
+ 82, 99, 111, 128, 144, 161, 181, 198, 222, 239, 267, 284, 316,
+ 333, 369, 386, 426, 443, 487, 504, 551, 565, 612, 622, 669, 675,
+ 722, 724, 54, 56, 75, 81, 100, 110, 129, 143, 162, 180, 199,
+ 221, 240, 266, 285, 315, 334, 368, 387, 425, 444, 486, 505, 550,
+ 566, 611, 623, 668, 676, 721, 725, 770, 55, 76, 80, 101, 109,
+ 130, 142, 163, 179, 200, 220, 241, 265, 286, 314, 335, 367, 388,
+ 424, 445, 485, 506, 549, 567, 610, 624, 667, 677, 720, 726, 769,
+ 771, 77, 79, 102, 108, 131, 141, 164, 178, 201, 219, 242, 264,
+ 287, 313, 336, 366, 389, 423, 446, 484, 507, 548, 568, 609, 625,
+ 666, 678, 719, 727, 768, 772, 813, 78, 103, 107, 132, 140, 165,
+ 177, 202, 218, 243, 263, 288, 312, 337, 365, 390, 422, 447, 483,
+ 508, 547, 569, 608, 626, 665, 679, 718, 728, 767, 773, 812, 814,
+ 104, 106, 133, 139, 166, 176, 203, 217, 244, 262, 289, 311, 338,
+ 364, 391, 421, 448, 482, 509, 546, 570, 607, 627, 664, 680, 717,
+ 729, 766, 774, 811, 815, 852, 105, 134, 138, 167, 175, 204, 216,
+ 245, 261, 290, 310, 339, 363, 392, 420, 449, 481, 510, 545, 571,
+ 606, 628, 663, 681, 716, 730, 765, 775, 810, 816, 851, 853, 135,
+ 137, 168, 174, 205, 215, 246, 260, 291, 309, 340, 362, 393, 419,
+ 450, 480, 511, 544, 572, 605, 629, 662, 682, 715, 731, 764, 776,
+ 809, 817, 850, 854, 887, 136, 169, 173, 206, 214, 247, 259, 292,
+ 308, 341, 361, 394, 418, 451, 479, 512, 543, 573, 604, 630, 661,
+ 683, 714, 732, 763, 777, 808, 818, 849, 855, 886, 888, 170, 172,
+ 207, 213, 248, 258, 293, 307, 342, 360, 395, 417, 452, 478, 513,
+ 542, 574, 603, 631, 660, 684, 713, 733, 762, 778, 807, 819, 848,
+ 856, 885, 889, 918, 171, 208, 212, 249, 257, 294, 306, 343, 359,
+ 396, 416, 453, 477, 514, 541, 575, 602, 632, 659, 685, 712, 734,
+ 761, 779, 806, 820, 847, 857, 884, 890, 917, 919, 209, 211, 250,
+ 256, 295, 305, 344, 358, 397, 415, 454, 476, 515, 540, 576, 601,
+ 633, 658, 686, 711, 735, 760, 780, 805, 821, 846, 858, 883, 891,
+ 916, 920, 945, 210, 251, 255, 296, 304, 345, 357, 398, 414, 455,
+ 475, 516, 539, 577, 600, 634, 657, 687, 710, 736, 759, 781, 804,
+ 822, 845, 859, 882, 892, 915, 921, 944, 946, 252, 254, 297, 303,
+ 346, 356, 399, 413, 456, 474, 517, 538, 578, 599, 635, 656, 688,
+ 709, 737, 758, 782, 803, 823, 844, 860, 881, 893, 914, 922, 943,
+ 947, 968, 253, 298, 302, 347, 355, 400, 412, 457, 473, 518, 537,
+ 579, 598, 636, 655, 689, 708, 738, 757, 783, 802, 824, 843, 861,
+ 880, 894, 913, 923, 942, 948, 967, 969, 299, 301, 348, 354, 401,
+ 411, 458, 472, 519, 536, 580, 597, 637, 654, 690, 707, 739, 756,
+ 784, 801, 825, 842, 862, 879, 895, 912, 924, 941, 949, 966, 970,
+ 987, 300, 349, 353, 402, 410, 459, 471, 520, 535, 581, 596, 638,
+ 653, 691, 706, 740, 755, 785, 800, 826, 841, 863, 878, 896, 911,
+ 925, 940, 950, 965, 971, 986, 988, 350, 352, 403, 409, 460, 470,
+ 521, 534, 582, 595, 639, 652, 692, 705, 741, 754, 786, 799, 827,
+ 840, 864, 877, 897, 910, 926, 939, 951, 964, 972, 985, 989, 1002,
+ 351, 404, 408, 461, 469, 522, 533, 583, 594, 640, 651, 693, 704,
+ 742, 753, 787, 798, 828, 839, 865, 876, 898, 909, 927, 938, 952,
+ 963, 973, 984, 990, 1001, 1003, 405, 407, 462, 468, 523, 532, 584,
+ 593, 641, 650, 694, 703, 743, 752, 788, 797, 829, 838, 866, 875,
+ 899, 908, 928, 937, 953, 962, 974, 983, 991, 1000, 1004, 1013, 406,
+ 463, 467, 524, 531, 585, 592, 642, 649, 695, 702, 744, 751, 789,
+ 796, 830, 837, 867, 874, 900, 907, 929, 936, 954, 961, 975, 982,
+ 992, 999, 1005, 1012, 1014, 464, 466, 525, 530, 586, 591, 643, 648,
+ 696, 701, 745, 750, 790, 795, 831, 836, 868, 873, 901, 906, 930,
+ 935, 955, 960, 976, 981, 993, 998, 1006, 1011, 1015, 1020, 465, 526,
+ 529, 587, 590, 644, 647, 697, 700, 746, 749, 791, 794, 832, 835,
+ 869, 872, 902, 905, 931, 934, 956, 959, 977, 980, 994, 997, 1007,
+ 1010, 1016, 1019, 1021, 527, 528, 588, 589, 645, 646, 698, 699, 747,
+ 748, 792, 793, 833, 834, 870, 871, 903, 904, 932, 933, 957, 958,
+ 978, 979, 995, 996, 1008, 1009, 1017, 1018, 1022, 1023
+};
const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
-#if CONFIG_CHROMA_2X2
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
-#endif
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
{ default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors },
-#if CONFIG_TX64X64
- { default_scan_64x64, av1_default_iscan_64x64, default_scan_64x64_neighbors },
-#endif // CONFIG_TX64X64
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors },
};
-const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
-#if CONFIG_CHROMA_2X2
- {
- // TX_2X2
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
-#endif // CONFIG_EXT_TX
- },
-#endif
+const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{
// TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
- { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
- { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_8X8
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
- { col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
- { row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
- { col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
- { row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
- { col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
- { row_scan_8x8, av1_row_iscan_8x8, row_scan_8x8_neighbors },
- { col_scan_8x8, av1_col_iscan_8x8, col_scan_8x8_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_16X16
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
- { col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
- { row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
- { col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
- { row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
- { col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
- { row_scan_16x16, av1_row_iscan_16x16, row_scan_16x16_neighbors },
- { col_scan_16x16, av1_col_iscan_16x16, col_scan_16x16_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_32X32
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
- { v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
-#if CONFIG_EXT_TX
- { h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
- { v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
-#endif // CONFIG_EXT_TX
- },
-#if CONFIG_TX64X64
- {
- // TX_64X64
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
-#endif // CONFIG_EXT_TX
- },
-#endif // CONFIG_TX64X64
- {
- // TX_4X8
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_8X4
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_8X16
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_16X8
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_16X32
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_32X16
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
-#endif // CONFIG_EXT_TX
- },
-#if CONFIG_TX64X64
- {
- // TX_32X64
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
-#endif // CONFIG_EXT_TX
- },
- {
- // TX_64X32
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
-#endif // CONFIG_EXT_TX
- }
-#endif // CONFIG_TX64X64
-};
-
-const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
-#if CONFIG_CHROMA_2X2
- {
- // TX_2X2
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
- { default_scan_2x2, av1_default_iscan_2x2, default_scan_2x2_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
-#endif // CONFIG_EXT_TX
- },
-#endif
- {
- // TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
{ mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
{ mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
{ mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_8X8
@@ -7453,20 +3236,18 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
{ mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
{ mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
{ mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
{ mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
{ mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_16X16
@@ -7478,7 +3259,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
@@ -7489,96 +3269,93 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x16_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16,
default_scan_16x16_neighbors },
- { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { default_scan_16x16, av1_default_iscan_16x16,
+ default_scan_16x16_neighbors },
{ mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
{ mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
{ mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
{ mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
{ mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
{ mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_32X32
{ default_scan_32x32, av1_default_iscan_32x32,
default_scan_32x32_neighbors },
- { h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
- { v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
-#if CONFIG_EXT_TX
- { h2_scan_32x32, av1_h2_iscan_32x32, h2_scan_32x32_neighbors },
- { v2_scan_32x32, av1_v2_iscan_32x32, v2_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { qtr_scan_32x32, av1_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
{ mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
{ mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
{ mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
{ mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
-#endif // CONFIG_EXT_TX
},
-#if CONFIG_TX64X64
{
// TX_64X64
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
- { default_scan_64x64, av1_default_iscan_64x64,
- default_scan_64x64_neighbors },
-#endif // CONFIG_EXT_TX
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
},
-#endif // CONFIG_TX64X64
{
// TX_4X8
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
{ mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
{ mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
{ mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
{ mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
{ mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
{ mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_8X4
@@ -7586,20 +3363,18 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
{ mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
{ mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
{ mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
{ mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
{ mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
{ mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_8X16
@@ -7611,7 +3386,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_8x16_neighbors },
{ default_scan_8x16, av1_default_iscan_8x16,
default_scan_8x16_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_8x16, av1_default_iscan_8x16,
default_scan_8x16_neighbors },
{ default_scan_8x16, av1_default_iscan_8x16,
@@ -7622,14 +3396,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_8x16_neighbors },
{ default_scan_8x16, av1_default_iscan_8x16,
default_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { default_scan_8x16, av1_default_iscan_8x16,
+ default_scan_8x16_neighbors },
{ mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
{ mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
{ mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
{ mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
{ mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
{ mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_16X8
@@ -7641,7 +3415,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x8_neighbors },
{ default_scan_16x8, av1_default_iscan_16x8,
default_scan_16x8_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_16x8, av1_default_iscan_16x8,
default_scan_16x8_neighbors },
{ default_scan_16x8, av1_default_iscan_16x8,
@@ -7652,14 +3425,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x8_neighbors },
{ default_scan_16x8, av1_default_iscan_16x8,
default_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { default_scan_16x8, av1_default_iscan_16x8,
+ default_scan_16x8_neighbors },
{ mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
{ mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
{ mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
{ mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
{ mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
{ mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_16X32
@@ -7671,7 +3444,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x32_neighbors },
{ default_scan_16x32, av1_default_iscan_16x32,
default_scan_16x32_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_16x32, av1_default_iscan_16x32,
default_scan_16x32_neighbors },
{ default_scan_16x32, av1_default_iscan_16x32,
@@ -7682,14 +3454,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x32_neighbors },
{ default_scan_16x32, av1_default_iscan_16x32,
default_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
{ mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
{ mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
{ mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
{ mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
{ mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
{ mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_32X16
@@ -7701,7 +3473,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_32x16_neighbors },
{ default_scan_32x16, av1_default_iscan_32x16,
default_scan_32x16_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_32x16, av1_default_iscan_32x16,
default_scan_32x16_neighbors },
{ default_scan_32x16, av1_default_iscan_32x16,
@@ -7712,91 +3483,77 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_32x16_neighbors },
{ default_scan_32x16, av1_default_iscan_32x16,
default_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
{ mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
{ mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
{ mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
{ mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
{ mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
{ mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
-#endif // CONFIG_EXT_TX
},
-#if CONFIG_TX64X64
{
// TX_32X64
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
- { default_scan_32x64, av1_default_iscan_32x64,
- default_scan_32x64_neighbors },
-#endif // CONFIG_EXT_TX
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
},
{
// TX_64X32
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
-#if CONFIG_EXT_TX
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
- { default_scan_64x32, av1_default_iscan_64x32,
- default_scan_64x32_neighbors },
-#endif // CONFIG_EXT_TX
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, av1_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
},
-#endif // CONFIG_TX64X64
{
// TX_4X16
{ default_scan_4x16, av1_default_iscan_4x16,
@@ -7807,7 +3564,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_4x16_neighbors },
{ default_scan_4x16, av1_default_iscan_4x16,
default_scan_4x16_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_4x16, av1_default_iscan_4x16,
default_scan_4x16_neighbors },
{ default_scan_4x16, av1_default_iscan_4x16,
@@ -7818,14 +3574,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_4x16_neighbors },
{ default_scan_4x16, av1_default_iscan_4x16,
default_scan_4x16_neighbors },
- { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
+ { default_scan_4x16, av1_default_iscan_4x16,
+ default_scan_4x16_neighbors },
{ mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
{ mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
{ mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
{ mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
{ mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
{ mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_16X4
@@ -7837,7 +3593,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x4_neighbors },
{ default_scan_16x4, av1_default_iscan_16x4,
default_scan_16x4_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_16x4, av1_default_iscan_16x4,
default_scan_16x4_neighbors },
{ default_scan_16x4, av1_default_iscan_16x4,
@@ -7848,14 +3603,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_16x4_neighbors },
{ default_scan_16x4, av1_default_iscan_16x4,
default_scan_16x4_neighbors },
- { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
+ { default_scan_16x4, av1_default_iscan_16x4,
+ default_scan_16x4_neighbors },
{ mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
{ mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
{ mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
{ mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
{ mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
{ mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_8X32
@@ -7867,7 +3622,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_8x32_neighbors },
{ default_scan_8x32, av1_default_iscan_8x32,
default_scan_8x32_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_8x32, av1_default_iscan_8x32,
default_scan_8x32_neighbors },
{ default_scan_8x32, av1_default_iscan_8x32,
@@ -7878,14 +3632,14 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_8x32_neighbors },
{ default_scan_8x32, av1_default_iscan_8x32,
default_scan_8x32_neighbors },
- { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
+ { default_scan_8x32, av1_default_iscan_8x32,
+ default_scan_8x32_neighbors },
{ mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
{ mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
{ mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
{ mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
{ mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
{ mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
-#endif // CONFIG_EXT_TX
},
{
// TX_32X8
@@ -7897,7 +3651,6 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_32x8_neighbors },
{ default_scan_32x8, av1_default_iscan_32x8,
default_scan_32x8_neighbors },
-#if CONFIG_EXT_TX
{ default_scan_32x8, av1_default_iscan_32x8,
default_scan_32x8_neighbors },
{ default_scan_32x8, av1_default_iscan_32x8,
@@ -7908,679 +3661,75 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
default_scan_32x8_neighbors },
{ default_scan_32x8, av1_default_iscan_32x8,
default_scan_32x8_neighbors },
- { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
+ { default_scan_32x8, av1_default_iscan_32x8,
+ default_scan_32x8_neighbors },
{ mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
{ mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
{ mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
{ mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
{ mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
{ mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
-#endif // CONFIG_EXT_TX
+ },
+ {
+ // TX_16X64
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, av1_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ },
+ {
+ // TX_64X16
+ // Half of the coefficients of tx64 at higher frequencies are set to
+ // zeros. So tx32's scan order is used.
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, av1_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
},
};
-
-#if CONFIG_ADAPT_SCAN
-// TX_32X32 will has 1024 coefficients whose indexes can be represented in 10
-// bits
-#define COEFF_IDX_BITS (10 + CONFIG_TX64X64)
-#define COEFF_IDX_SIZE (1 << COEFF_IDX_BITS)
-#define COEFF_IDX_MASK (COEFF_IDX_SIZE - 1)
-
-static uint32_t *get_non_zero_prob(FRAME_CONTEXT *fc, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return fc->non_zero_prob_2x2[tx_type];
-#endif
- case TX_4X4: return fc->non_zero_prob_4X4[tx_type];
- case TX_8X8: return fc->non_zero_prob_8X8[tx_type];
- case TX_16X16: return fc->non_zero_prob_16X16[tx_type];
- case TX_32X32: return fc->non_zero_prob_32X32[tx_type];
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X8: return fc->non_zero_prob_4X8[tx_type];
- case TX_8X4: return fc->non_zero_prob_8X4[tx_type];
- case TX_8X16: return fc->non_zero_prob_8X16[tx_type];
- case TX_16X8: return fc->non_zero_prob_16X8[tx_type];
- case TX_16X32: return fc->non_zero_prob_16X32[tx_type];
- case TX_32X16: return fc->non_zero_prob_32X16[tx_type];
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- default: assert(0); return NULL;
- }
-}
-
-static int16_t *get_adapt_scan(FRAME_CONTEXT *fc, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return fc->scan_2x2[tx_type];
-#endif
- case TX_4X4: return fc->scan_4X4[tx_type];
- case TX_8X8: return fc->scan_8X8[tx_type];
- case TX_16X16: return fc->scan_16X16[tx_type];
- case TX_32X32: return fc->scan_32X32[tx_type];
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X8: return fc->scan_4X8[tx_type];
- case TX_8X4: return fc->scan_8X4[tx_type];
- case TX_8X16: return fc->scan_8X16[tx_type];
- case TX_16X8: return fc->scan_16X8[tx_type];
- case TX_16X32: return fc->scan_16X32[tx_type];
- case TX_32X16: return fc->scan_32X16[tx_type];
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- default: assert(0); return NULL;
- }
-}
-
-static int16_t *get_adapt_iscan(FRAME_CONTEXT *fc, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return fc->iscan_2x2[tx_type];
-#endif
- case TX_4X4: return fc->iscan_4X4[tx_type];
- case TX_8X8: return fc->iscan_8X8[tx_type];
- case TX_16X16: return fc->iscan_16X16[tx_type];
- case TX_32X32: return fc->iscan_32X32[tx_type];
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X8: return fc->iscan_4X8[tx_type];
- case TX_8X4: return fc->iscan_8X4[tx_type];
- case TX_8X16: return fc->iscan_8X16[tx_type];
- case TX_16X8: return fc->iscan_16X8[tx_type];
- case TX_16X32: return fc->iscan_16X32[tx_type];
- case TX_32X16: return fc->iscan_32X16[tx_type];
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- default: assert(0); return NULL;
- }
-}
-
-static int16_t *get_adapt_nb(FRAME_CONTEXT *fc, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return fc->nb_2x2[tx_type];
-#endif
- case TX_4X4: return fc->nb_4X4[tx_type];
- case TX_8X8: return fc->nb_8X8[tx_type];
- case TX_16X16: return fc->nb_16X16[tx_type];
- case TX_32X32: return fc->nb_32X32[tx_type];
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X8: return fc->nb_4X8[tx_type];
- case TX_8X4: return fc->nb_8X4[tx_type];
- case TX_8X16: return fc->nb_8X16[tx_type];
- case TX_16X8: return fc->nb_16X8[tx_type];
- case TX_16X32: return fc->nb_16X32[tx_type];
- case TX_32X16: return fc->nb_32X16[tx_type];
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- default: assert(0); return NULL;
- }
-}
-
-static uint32_t *get_non_zero_counts(FRAME_COUNTS *counts, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- switch (tx_size) {
-#if CONFIG_CHROMA_2X2
- case TX_2X2: return counts->non_zero_count_2x2[tx_type];
-#endif
- case TX_4X4: return counts->non_zero_count_4X4[tx_type];
- case TX_8X8: return counts->non_zero_count_8X8[tx_type];
- case TX_16X16: return counts->non_zero_count_16X16[tx_type];
- case TX_32X32: return counts->non_zero_count_32X32[tx_type];
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- case TX_4X8: return counts->non_zero_count_4x8[tx_type];
- case TX_8X4: return counts->non_zero_count_8x4[tx_type];
- case TX_8X16: return counts->non_zero_count_8x16[tx_type];
- case TX_16X8: return counts->non_zero_count_16x8[tx_type];
- case TX_16X32: return counts->non_zero_count_16x32[tx_type];
- case TX_32X16: return counts->non_zero_count_32x16[tx_type];
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- default: assert(0); return NULL;
- }
-}
-
-static INLINE int clamp_64(int64_t value, int low, int high) {
- return value < low ? low : (value > high ? high : (int)value);
-}
-
-#if USE_2X2_PROB
-static int do_down_sample(TX_SIZE tx_size) {
- const int tx_w = tx_size_wide[tx_size];
- const int tx_h = tx_size_high[tx_size];
- if (tx_w > 8 || tx_h > 8) {
- return 1;
- } else {
- return 0;
- }
-}
-
-void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
- const uint32_t *non_zero_count,
- TX_SIZE tx_size) {
- const int tx_w = tx_size_wide[tx_size];
- const int tx_h = tx_size_high[tx_size];
- if (tx_w > 8 && tx_h > 8) {
- const int tx_w_ds = tx_w >> 1;
- const int tx_h_ds = tx_h >> 1;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds << 1;
- const int c = c_ds << 1;
- const int ci = r * tx_w + c;
- non_zero_count_ds[ci_ds] = non_zero_count[ci];
- }
- }
- } else if (tx_w > 8 && tx_h <= 8) {
- const int tx_w_ds = tx_w >> 1;
- const int tx_h_ds = tx_h;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds;
- const int c = c_ds << 1;
- const int ci = r * tx_w + c;
- non_zero_count_ds[ci_ds] = non_zero_count[ci];
- }
- }
- } else if (tx_w <= 8 && tx_h > 8) {
- const int tx_w_ds = tx_w;
- const int tx_h_ds = tx_h >> 1;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds << 1;
- const int c = c_ds;
- const int ci = r * tx_w + c;
- non_zero_count_ds[ci_ds] = non_zero_count[ci];
- }
- }
- } else {
- assert(0);
- }
-}
-
-void av1_up_sample_scan_count(uint32_t *non_zero_count,
- const uint32_t *non_zero_count_ds,
- TX_SIZE tx_size, unsigned int block_num) {
- const int tx_w = tx_size_wide[tx_size];
- const int tx_h = tx_size_high[tx_size];
- if (tx_w > 8 && tx_h > 8) {
- const int tx_w_ds = tx_w >> 1;
- const int tx_h_ds = tx_h >> 1;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds << 1;
- const int c = c_ds << 1;
- const int ci = r * tx_w + c;
- non_zero_count[ci] = non_zero_count_ds[ci_ds];
- if (c_ds + 1 < tx_w_ds) {
- uint32_t count =
- non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + 1];
- count = ROUND_POWER_OF_TWO(count, 1);
- count = clamp32u(count, 0, block_num);
- non_zero_count[ci + 1] = count;
- } else {
- non_zero_count[ci + 1] = non_zero_count_ds[ci_ds];
- }
- }
- }
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c = 0; c < tx_w; ++c) {
- const int r = r_ds << 1;
- const int ci = r * tx_w + c;
- if (r + 2 < tx_h) {
- uint32_t count = non_zero_count[ci] + non_zero_count[ci + 2 * tx_w];
- count = ROUND_POWER_OF_TWO(count, 1);
- count = clamp32u(count, 0, block_num);
- non_zero_count[ci + tx_w] = count;
- } else {
- non_zero_count[ci + tx_w] = non_zero_count[ci];
- }
- }
- }
- } else if (tx_w > 8 && tx_h <= 8) {
- const int tx_w_ds = tx_w >> 1;
- const int tx_h_ds = tx_h;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds;
- const int c = c_ds << 1;
- const int ci = r * tx_w + c;
- non_zero_count[ci] = non_zero_count_ds[ci_ds];
- if (c_ds + 1 < tx_w_ds) {
- uint32_t count =
- non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + 1];
- count = ROUND_POWER_OF_TWO(count, 1);
- count = clamp32u(count, 0, block_num);
- non_zero_count[ci + 1] = count;
- } else {
- non_zero_count[ci + 1] = non_zero_count_ds[ci_ds];
- }
- }
- }
- } else if (tx_w <= 8 && tx_h > 8) {
- const int tx_w_ds = tx_w;
- const int tx_h_ds = tx_h >> 1;
- for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
- for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
- const int ci_ds = r_ds * tx_w_ds + c_ds;
- const int r = r_ds << 1;
- const int c = c_ds;
- const int ci = r * tx_w + c;
- non_zero_count[ci] = non_zero_count_ds[ci_ds];
- if (r_ds + 1 < tx_h_ds) {
- uint32_t count =
- non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + tx_w_ds];
- count = ROUND_POWER_OF_TWO(count, 1);
- count = clamp32u(count, 0, block_num);
- non_zero_count[ci + tx_w] = count;
- } else {
- non_zero_count[ci + tx_w] = non_zero_count_ds[ci_ds];
- }
- }
- }
- } else {
- assert(0);
- }
-}
-#endif
-
-static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
- int rate) {
- FRAME_CONTEXT *pre_fc = cm->pre_fc;
- uint32_t *prev_non_zero_prob = get_non_zero_prob(pre_fc, tx_size, tx_type);
- uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
- uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type);
- const int tx2d_size = tx_size_2d[tx_size];
- unsigned int block_num = cm->counts.txb_count[tx_size][tx_type];
-#if USE_2X2_PROB
-#if CONFIG_TX64X64
- DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[1024]);
- assert((tx2d_size >> 2) <= 1024);
-#else // CONFIG_TX64X64
- DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[256]);
- assert((tx2d_size >> 2) <= 256);
-#endif // CONFIG_TX64X64
- if (do_down_sample(tx_size)) {
- av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, tx_size);
- av1_up_sample_scan_count(non_zero_count, non_zero_count_ds, tx_size,
- block_num);
- }
-#endif
- int i;
- const int inv_precision = 30;
- int32_t inv_block_num = block_num == 0 ? 0 : (1 << inv_precision) / block_num;
- for (i = 0; i < tx2d_size; i++) {
- int64_t curr_prob =
- block_num == 0 ? 0 : ((non_zero_count[i] * inv_block_num) >>
- (inv_precision - ADAPT_SCAN_PROB_PRECISION));
- int64_t prev_prob = prev_non_zero_prob[i];
- int64_t pred_prob =
- (curr_prob * rate +
- prev_prob * ((1 << ADAPT_SCAN_PROB_PRECISION) - rate)) >>
- ADAPT_SCAN_PROB_PRECISION;
- // TODO(angiebird): reduce the bit usage of probabilities and remove
- // clamp_64()
- non_zero_prob[i] =
- clamp_64(pred_prob, 0, (1 << ADAPT_SCAN_PROB_PRECISION) - 1);
- }
-}
-
-static void update_scan_count(int16_t *scan, int max_scan,
- const tran_low_t *dqcoeffs,
- uint32_t *non_zero_count) {
- int i;
- for (i = 0; i < max_scan; ++i) {
- int coeff_idx = scan[i];
- non_zero_count[coeff_idx] += (dqcoeffs[coeff_idx] != 0);
- }
-}
-
-void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
- TX_SIZE tx_size, TX_TYPE tx_type,
- const tran_low_t *dqcoeffs, int max_scan) {
- if (cm->use_adapt_scan && do_adapt_scan(tx_size, tx_type)) {
- int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
- uint32_t *non_zero_count = get_non_zero_counts(counts, tx_size, tx_type);
- update_scan_count(scan, max_scan, dqcoeffs, non_zero_count);
- ++counts->txb_count[tx_size][tx_type];
- }
-}
-
-static int cmp_prob(const void *a, const void *b) {
- return *(const uint32_t *)b > *(const uint32_t *)a ? 1 : -1;
-}
-
-void av1_augment_prob(TX_SIZE tx_size, TX_TYPE tx_type, uint32_t *prob) {
- // TODO(angiebird): check if we need is_inter here
- const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
- const int tx1d_wide = tx_size_wide[tx_size];
- const int tx1d_high = tx_size_high[tx_size];
- int r, c;
- for (r = 0; r < tx1d_high; r++) {
- for (c = 0; c < tx1d_wide; c++) {
- const int idx = r * tx1d_wide + c;
- const uint32_t mask_16 = ((1 << 16) - 1);
- const uint32_t tie_breaker = ~((uint32_t)sc->iscan[idx]);
- // prob[idx]: 16 bits dummy: 6 bits scan_idx: 10 bits
- prob[idx] = (prob[idx] << 16) | (mask_16 & tie_breaker);
- }
- }
-}
-
-void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan,
- const int16_t *iscan, int16_t *neighbors) {
- const int tx1d_wide = tx_size_wide[tx_size];
- const int tx1d_high = tx_size_high[tx_size];
- const int tx2d_size = tx_size_2d[tx_size];
- int scan_idx;
- for (scan_idx = 0; scan_idx < tx2d_size; ++scan_idx) {
- const int coeff_idx = scan[scan_idx];
- const int r = coeff_idx / tx1d_wide;
- const int c = coeff_idx % tx1d_wide;
- const int nb_offset_r[5] = { -1, 0, -1, -1, 1 };
- const int nb_offset_c[5] = { 0, -1, -1, 1, -1 };
- const int nb_num = 5;
- int nb_count = 0;
- int nb_idx;
-
- for (nb_idx = 0; nb_idx < nb_num; ++nb_idx) {
- if (nb_count < 2) {
- int nb_r = r + nb_offset_r[nb_idx];
- int nb_c = c + nb_offset_c[nb_idx];
- int nb_coeff_idx = nb_r * tx1d_wide + nb_c;
- int valid_pos =
- nb_r >= 0 && nb_r < tx1d_high && nb_c >= 0 && nb_c < tx1d_wide;
- if (valid_pos && iscan[nb_coeff_idx] < scan_idx) {
- neighbors[scan_idx * MAX_NEIGHBORS + nb_count] = nb_coeff_idx;
- ++nb_count;
- }
- } else {
- break;
- }
- }
-
- if (nb_count == 1) {
- neighbors[scan_idx * MAX_NEIGHBORS + 1] =
- neighbors[scan_idx * MAX_NEIGHBORS + 0];
- } else if (nb_count == 0) {
- neighbors[scan_idx * MAX_NEIGHBORS + 0] = scan[0];
- neighbors[scan_idx * MAX_NEIGHBORS + 1] = scan[0];
- }
- }
- neighbors[tx2d_size * MAX_NEIGHBORS + 0] = scan[0];
- neighbors[tx2d_size * MAX_NEIGHBORS + 1] = scan[0];
-}
-
-#if USE_LIMIT_SCAN_DISTANCE
-typedef struct SCAN_NB_QUEUE {
- int nb_ci_queue[COEFF_IDX_SIZE + 1];
- int pr_si_queue[COEFF_IDX_SIZE + 1];
- int size;
- int start;
- int end;
-} SCAN_NB_QUEUE;
-
-static void assign_scan_idx(int16_t coeff_idx, int16_t *scan_idx, int tx_width,
- int tx_height, int16_t *scan, int16_t *iscan,
- int16_t *visit, SCAN_NB_QUEUE *queue) {
- if (visit[coeff_idx] != 2) {
- assert(*scan_idx < tx_width * tx_height);
- scan[*scan_idx] = coeff_idx;
- iscan[coeff_idx] = *scan_idx;
- visit[coeff_idx] = 2;
- int row = coeff_idx / tx_width;
- int col = coeff_idx % tx_width;
- int right_ci = coeff_idx + 1;
- if (col + 1 < tx_width && visit[right_ci] == 0) {
- visit[right_ci] = 1;
- queue->pr_si_queue[queue->end] = *scan_idx;
- queue->nb_ci_queue[queue->end] = right_ci;
- queue->end = (queue->end + 1) % queue->size;
- }
- int down_ci = coeff_idx + tx_width;
- if (row + 1 < tx_height && visit[down_ci] == 0) {
- visit[down_ci] = 1;
- queue->pr_si_queue[queue->end] = *scan_idx;
- queue->nb_ci_queue[queue->end] = down_ci;
- queue->end = (queue->end + 1) % queue->size;
- }
- ++(*scan_idx);
- }
-}
-static void limit_nb_scan_distance(TX_SIZE tx_size, int16_t *scan,
- int16_t *iscan) {
- const int tx2d_size = tx_size_2d[tx_size];
- int16_t visit[COEFF_IDX_SIZE] = { 0 };
- int16_t org_scan[COEFF_IDX_SIZE];
- memcpy(org_scan, scan, tx2d_size * sizeof(*scan));
- const int tx_width = tx_size_wide[tx_size];
- const int tx_height = tx_size_high[tx_size];
- const int limit = 2 * AOMMAX(tx_width, tx_height);
- SCAN_NB_QUEUE queue;
- queue.size = tx2d_size;
- queue.start = 0;
- queue.end = 0;
- int16_t new_si = 0;
- for (int16_t si = 0; si < tx2d_size; ++si) {
- while (queue.start != queue.end &&
- queue.pr_si_queue[queue.start] + limit <= new_si) {
- int nb_ci = queue.nb_ci_queue[queue.start];
- assign_scan_idx(nb_ci, &new_si, tx_width, tx_height, scan, iscan, visit,
- &queue);
- queue.start = (queue.start + 1) % queue.size;
- }
-
- int16_t ci = org_scan[si];
- assign_scan_idx(ci, &new_si, tx_width, tx_height, scan, iscan, visit,
- &queue);
- }
- assert(new_si == tx2d_size);
-}
-#endif // USE_LIMIT_SCAN_DISTANCE
-
-#if USE_TOPOLOGICAL_SORT
-void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
- const uint32_t *non_zero_prob, int16_t *sort_order) {
- const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
- uint32_t temp[COEFF_IDX_SIZE];
- const int tx2d_size = tx_size_2d[tx_size];
- int sort_idx;
- assert(tx2d_size <= COEFF_IDX_SIZE);
- memcpy(temp, non_zero_prob, tx2d_size * sizeof(*non_zero_prob));
- av1_augment_prob(tx_size, tx_type, temp);
- qsort(temp, tx2d_size, sizeof(*temp), cmp_prob);
- for (sort_idx = 0; sort_idx < tx2d_size; ++sort_idx) {
- const int default_scan_idx =
- (temp[sort_idx] & COEFF_IDX_MASK) ^ COEFF_IDX_MASK;
- const int coeff_idx = sc->scan[default_scan_idx];
- sort_order[sort_idx] = coeff_idx;
- }
-}
-
-// topological sort
-static void dfs_scan(int tx1d_size, int *scan_idx, int coeff_idx, int16_t *scan,
- int16_t *iscan) {
- const int r = coeff_idx / tx1d_size;
- const int c = coeff_idx % tx1d_size;
-
- if (iscan[coeff_idx] != -1) return;
-
- if (r > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - tx1d_size, scan, iscan);
-
- if (c > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - 1, scan, iscan);
-
- scan[*scan_idx] = coeff_idx;
- iscan[coeff_idx] = *scan_idx;
- ++(*scan_idx);
-}
-
-void av1_update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
- int16_t *iscan) {
- int coeff_idx;
- int scan_idx;
- int sort_idx;
- const int tx1d_size = tx_size_wide[tx_size];
- const int tx2d_size = tx_size_2d[tx_size];
-
- for (coeff_idx = 0; coeff_idx < tx2d_size; ++coeff_idx) {
- iscan[coeff_idx] = -1;
- }
-
- scan_idx = 0;
- for (sort_idx = 0; sort_idx < tx2d_size; ++sort_idx) {
- coeff_idx = sort_order[sort_idx];
- dfs_scan(tx1d_size, &scan_idx, coeff_idx, scan, iscan);
- }
-}
-#else
-
-static void filter_prob(TX_SIZE tx_size, uint32_t *prob) {
- const int tx1d_wide = tx_size_wide[tx_size];
- const int tx1d_high = tx_size_high[tx_size];
- for (int r = tx1d_high - 1; r >= 0; --r) {
- for (int c = tx1d_wide - 1; c >= 0; --c) {
- int idx = r * tx1d_wide + c;
- uint32_t v = prob[idx];
- if (r > 0 && prob[idx - tx1d_wide] < v) prob[idx - tx1d_wide] = v;
- if (c > 0 && prob[idx - 1] < v) prob[idx - 1] = v;
- }
- }
-}
-
-void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type,
- uint32_t *non_zero_prob, int16_t *scan,
- int16_t *iscan) {
- const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
- uint32_t temp[COEFF_IDX_SIZE];
- const int tx2d_size = tx_size_2d[tx_size];
- int scan_idx;
- assert(tx2d_size <= COEFF_IDX_SIZE);
- memcpy(temp, non_zero_prob, tx2d_size * sizeof(*non_zero_prob));
- filter_prob(tx_size, temp);
- av1_augment_prob(tx_size, tx_type, temp);
- qsort(temp, tx2d_size, sizeof(*temp), cmp_prob);
- for (scan_idx = 0; scan_idx < tx2d_size; ++scan_idx) {
- const int default_scan_idx =
- (temp[scan_idx] & COEFF_IDX_MASK) ^ COEFF_IDX_MASK;
- const int coeff_idx = sc->scan[default_scan_idx];
- scan[scan_idx] = coeff_idx;
- iscan[coeff_idx] = scan_idx;
- }
-}
-#endif
-
-static void update_scan_order_facade(AV1_COMMON *cm, TX_SIZE tx_size,
- TX_TYPE tx_type, int use_curr_frame) {
-#if USE_TOPOLOGICAL_SORT
- int16_t sort_order[COEFF_IDX_SIZE];
-#endif
- uint32_t *non_zero_prob;
- if (use_curr_frame)
- non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
- else
- non_zero_prob = get_non_zero_prob(cm->pre_fc, tx_size, tx_type);
- int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
- int16_t *iscan = get_adapt_iscan(cm->fc, tx_size, tx_type);
- int16_t *nb = get_adapt_nb(cm->fc, tx_size, tx_type);
- assert(tx_size_2d[tx_size] <= COEFF_IDX_SIZE);
-#if USE_TOPOLOGICAL_SORT
- av1_update_sort_order(tx_size, tx_type, non_zero_prob, sort_order);
- av1_update_scan_order(tx_size, sort_order, scan, iscan);
-#else
- av1_update_scan_order(tx_size, tx_type, non_zero_prob, scan, iscan);
-#endif
-#if USE_LIMIT_SCAN_DISTANCE
- limit_nb_scan_distance(tx_size, scan, iscan);
-#endif // USE_LIMIT_SCAN_DISTANCE
- av1_update_neighbors(tx_size, scan, iscan, nb);
-}
-
-static void update_eob_threshold(AV1_COMMON *cm, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- int i, row, col, row_limit, col_limit, cal_idx = 0;
- const int tx_width = tx_size_wide[tx_size];
- const int tx_height = tx_size_high[tx_size];
-
- row_limit = tx_width >> 1;
- col_limit = tx_height >> 1;
-
- if (tx_width >= 8 && tx_height >= 8) {
- SCAN_ORDER *sc = &cm->fc->sc[tx_size][tx_type];
- int16_t *threshold = &cm->fc->eob_threshold[tx_size][tx_type][0];
- const int tx2d_size = tx_size_2d[tx_size];
-
- while (cal_idx < EOB_THRESHOLD_NUM) {
- for (i = 0; i < tx2d_size; ++i) {
- row = sc->scan[i] / tx_height;
- col = sc->scan[i] % tx_width;
- if (row >= row_limit || col >= col_limit) break;
- }
- row_limit >>= 1;
- col_limit >>= 1;
- threshold[cal_idx] = i;
- cal_idx++;
- }
- }
-}
-
-void av1_init_scan_order(AV1_COMMON *cm) {
- TX_SIZE tx_size;
- TX_TYPE tx_type;
- for (tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- if (tx_size > TX_32X16) continue;
-#else
- if (tx_size >= TX_SIZES) continue;
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- if (do_adapt_scan(tx_size, tx_type)) {
- uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
- const int tx2d_size = tx_size_2d[tx_size];
- int i;
- SCAN_ORDER *sc = &cm->fc->sc[tx_size][tx_type];
- for (i = 0; i < tx2d_size; ++i) {
- non_zero_prob[i] = (1 << ADAPT_SCAN_PROB_PRECISION) /
- 2; // init non_zero_prob to 0.5
- }
- update_scan_order_facade(cm, tx_size, tx_type, 1);
- sc->scan = get_adapt_scan(cm->fc, tx_size, tx_type);
- sc->iscan = get_adapt_iscan(cm->fc, tx_size, tx_type);
- sc->neighbors = get_adapt_nb(cm->fc, tx_size, tx_type);
- update_eob_threshold(cm, tx_size, tx_type);
- }
- }
- }
-}
-
-void av1_adapt_scan_order(AV1_COMMON *cm) {
- if (cm->use_adapt_scan) {
- TX_SIZE tx_size;
-#if CACHE_SCAN_PROB
- int use_curr_frame = 0;
-#else // CACHE_SCAN_PROB
- int use_curr_frame = 1;
-#endif // CACHE_SCAN_PROB
-
- for (tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
-#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- if (tx_size > TX_32X16) continue;
-#else
- if (tx_size >= TX_SIZES) continue;
-#endif // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
- TX_TYPE tx_type;
- for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- if (do_adapt_scan(tx_size, tx_type)) {
- update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE);
- update_scan_order_facade(cm, tx_size, tx_type, use_curr_frame);
- update_eob_threshold(cm, tx_size, tx_type);
- }
- }
- }
- }
-}
-
-void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd) {
- xd->eob_threshold_md = (const EobThresholdMD *)cm->fc->eob_threshold;
-}
-#endif // CONFIG_ADAPT_SCAN
diff --git a/third_party/aom/av1/common/scan.h b/third_party/aom/av1/common/scan.h
index 82d2e917f..c5cebc135 100644
--- a/third_party/aom/av1/common/scan.h
+++ b/third_party/aom/av1/common/scan.h
@@ -25,51 +25,18 @@ extern "C" {
#define MAX_NEIGHBORS 2
-extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
-extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES];
-extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
-
-#if CONFIG_ADAPT_SCAN
-#define USE_2X2_PROB 1
-#define USE_TOPOLOGICAL_SORT 0
-#define USE_LIMIT_SCAN_DISTANCE 0
-void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
- TX_SIZE tx_size, TX_TYPE tx_type,
- const tran_low_t *dqcoeffs, int max_scan);
-
-// embed r + c and coeff_idx info with nonzero probabilities. When sorting the
-// nonzero probabilities, if there is a tie, the coefficient with smaller r + c
-// will be scanned first
-void av1_augment_prob(TX_SIZE tx_size, TX_TYPE tx_type, uint32_t *prob);
+typedef enum SCAN_MODE {
+ SCAN_MODE_ZIG_ZAG,
+ SCAN_MODE_COL_DIAG,
+ SCAN_MODE_ROW_DIAG,
+ SCAN_MODE_COL_1D,
+ SCAN_MODE_ROW_1D,
+ SCAN_MODES
+} SCAN_MODE;
-#if USE_TOPOLOGICAL_SORT
-// apply quick sort on nonzero probabilities to obtain a sort order
-void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
- const uint32_t *non_zero_prob, int16_t *sort_order);
-
-// apply topological sort on the nonzero probabilities sorting order to
-// guarantee each to-be-scanned coefficient's upper and left coefficient will be
-// scanned before the to-be-scanned coefficient.
-void av1_update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
- int16_t *iscan);
-#else // USE_TOPOLOGICAL_SORT
-void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type,
- uint32_t *non_zero_prob, int16_t *scan,
- int16_t *iscan);
-#endif // USE_TOPOLOGICAL_SORT
+extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
+extern const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES];
-// For each coeff_idx in scan[], update its above and left neighbors in
-// neighbors[] accordingly.
-void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan,
- const int16_t *iscan, int16_t *neighbors);
-void av1_init_scan_order(AV1_COMMON *cm);
-void av1_adapt_scan_order(AV1_COMMON *cm);
-#if USE_2X2_PROB
-void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
- const uint32_t *non_zero_count,
- TX_SIZE tx_size);
-#endif // USE_2X2_PROB
-#endif // CONFIG_ADAPT_SCAN
void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd);
static INLINE int get_coef_context(const int16_t *neighbors,
@@ -80,52 +47,12 @@ static INLINE int get_coef_context(const int16_t *neighbors,
}
static INLINE const SCAN_ORDER *get_default_scan(TX_SIZE tx_size,
- TX_TYPE tx_type,
- int is_inter) {
-#if CONFIG_EXT_TX || CONFIG_VAR_TX
- return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
- : &av1_intra_scan_orders[tx_size][tx_type];
-#else
- (void)is_inter;
- return &av1_intra_scan_orders[tx_size][tx_type];
-#endif // CONFIG_EXT_TX
+ TX_TYPE tx_type) {
+ return &av1_scan_orders[tx_size][tx_type];
}
-static INLINE int do_adapt_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
- (void)tx_size;
-#if CONFIG_EXT_TX
- if (tx_size_2d[tx_size] >= 1024 && tx_type != DCT_DCT) return 0;
- return tx_type < IDTX;
-#else
- (void)tx_type;
- return 1;
-#endif
-}
-
-static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
- TX_TYPE tx_type,
- const MB_MODE_INFO *mbmi) {
-#if CONFIG_MRC_TX
- // use the DCT_DCT scan order for MRC_DCT for now
- if (tx_type == MRC_DCT) tx_type = DCT_DCT;
-#endif // CONFIG_MRC_TX
-#if CONFIG_LGT_FROM_PRED
- if (mbmi->use_lgt) tx_type = DCT_DCT;
-#endif
- const int is_inter = is_inter_block(mbmi);
-#if CONFIG_ADAPT_SCAN
- (void)mbmi;
- (void)is_inter;
-#if CONFIG_EXT_TX
- if (!do_adapt_scan(tx_size, tx_type))
- return get_default_scan(tx_size, tx_type, is_inter);
- else
-#endif // CONFIG_EXT_TX
- return &cm->fc->sc[tx_size][tx_type];
-#else // CONFIG_ADAPT_SCAN
- (void)cm;
- return get_default_scan(tx_size, tx_type, is_inter);
-#endif // CONFIG_ADAPT_SCAN
+static INLINE const SCAN_ORDER *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
+ return get_default_scan(tx_size, tx_type);
}
#ifdef __cplusplus
diff --git a/third_party/aom/av1/common/seg_common.c b/third_party/aom/av1/common/seg_common.c
index 4603026bd..cd189ad76 100644
--- a/third_party/aom/av1/common/seg_common.c
+++ b/third_party/aom/av1/common/seg_common.c
@@ -16,18 +16,11 @@
#include "av1/common/seg_common.h"
#include "av1/common/quant_common.h"
-#if CONFIG_LOOPFILTER_LEVEL
-static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 1, 1, 0, 0 };
+static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 1, 1, 1, 0, 0 };
static const int seg_feature_data_max[SEG_LVL_MAX] = {
- MAXQ, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 0
+ MAXQ, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 7, 0
};
-#else
-static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
-
-static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3,
- 0 };
-#endif // CONFIG_LOOPFILTER_LEVEL
// These functions provide access to new segment level features.
// Eventually these function may be "optimized out" but for the moment,
@@ -39,6 +32,19 @@ void av1_clearall_segfeatures(struct segmentation *seg) {
av1_zero(seg->feature_mask);
}
+void calculate_segdata(struct segmentation *seg) {
+ seg->segid_preskip = 0;
+ seg->last_active_segid = 0;
+ for (int i = 0; i < MAX_SEGMENTS; i++) {
+ for (int j = 0; j < SEG_LVL_MAX; j++) {
+ if (seg->feature_mask[i] & (1 << j)) {
+ seg->segid_preskip |= (j >= SEG_LVL_REF_FRAME);
+ seg->last_active_segid = i;
+ }
+ }
+ }
+}
+
void av1_enable_segfeature(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id) {
seg->feature_mask[segment_id] |= 1 << feature_id;
@@ -52,6 +58,17 @@ int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
return seg_feature_data_signed[feature_id];
}
+// The 'seg_data' given for each segment can be either deltas (from the default
+// value chosen for the frame) or absolute values.
+//
+// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for
+// SEGMENT_ALT_LF)
+// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for
+// SEGMENT_ALT_LF)
+//
+// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
+// the absolute values given).
+
void av1_set_segdata(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id, int seg_data) {
if (seg_data < 0) {
@@ -64,8 +81,4 @@ void av1_set_segdata(struct segmentation *seg, int segment_id,
seg->feature_data[segment_id][feature_id] = seg_data;
}
-const aom_tree_index av1_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
- 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7
-};
-
// TBD? Functions to read and write segment data with range / validity checking
diff --git a/third_party/aom/av1/common/seg_common.h b/third_party/aom/av1/common/seg_common.h
index 6d16aedb6..c851d65fd 100644
--- a/third_party/aom/av1/common/seg_common.h
+++ b/third_party/aom/av1/common/seg_common.h
@@ -18,15 +18,12 @@
extern "C" {
#endif
-#define SEGMENT_DELTADATA 0
-#define SEGMENT_ABSDATA 1
-
#define MAX_SEGMENTS 8
#define SEG_TREE_PROBS (MAX_SEGMENTS - 1)
-#define PREDICTION_PROBS 3
+#define SEG_TEMPORAL_PRED_CTXS 3
+#define SPATIAL_PREDICTION_PROBS 3
-#if CONFIG_LOOPFILTER_LEVEL
typedef enum {
SEG_LVL_ALT_Q, // Use alternate Quantizer ....
SEG_LVL_ALT_LF_Y_V, // Use alternate loop filter value on y plane vertical
@@ -35,47 +32,31 @@ typedef enum {
SEG_LVL_ALT_LF_V, // Use alternate loop filter value on v plane
SEG_LVL_REF_FRAME, // Optional Segment reference frame
SEG_LVL_SKIP, // Optional Segment (0,0) + skip mode
-#if CONFIG_SEGMENT_ZEROMV
- SEG_LVL_ZEROMV,
- SEG_LVL_MAX
-#else
+ SEG_LVL_GLOBALMV,
SEG_LVL_MAX
-#endif
-} SEG_LVL_FEATURES;
-#else // CONFIG_LOOPFILTER_LEVEL
-// Segment level features.
-typedef enum {
- SEG_LVL_ALT_Q = 0, // Use alternate Quantizer ....
- SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
- SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
- SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode
-#if CONFIG_SEGMENT_ZEROMV
- SEG_LVL_ZEROMV = 4,
- SEG_LVL_MAX = 5
-#else
- SEG_LVL_MAX = 4
-#endif
} SEG_LVL_FEATURES;
-#endif // CONFIG_LOOPFILTER_LEVEL
struct segmentation {
uint8_t enabled;
uint8_t update_map;
uint8_t update_data;
- uint8_t abs_delta;
uint8_t temporal_update;
int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
unsigned int feature_mask[MAX_SEGMENTS];
+ int last_active_segid; // The highest numbered segment id that has some
+ // enabled feature.
+ uint8_t segid_preskip; // Whether the segment id will be read before the
+ // skip syntax element.
+ // 1: the segment id will be read first.
+ // 0: the skip syntax element will be read first.
};
struct segmentation_probs {
- aom_prob tree_probs[SEG_TREE_PROBS];
aom_cdf_prob tree_cdf[CDF_SIZE(MAX_SEGMENTS)];
- aom_prob pred_probs[PREDICTION_PROBS];
-#if CONFIG_NEW_MULTISYMBOL
- aom_cdf_prob pred_cdf[PREDICTION_PROBS][CDF_SIZE(2)];
-#endif
+ aom_cdf_prob pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)];
+ aom_cdf_prob spatial_pred_seg_cdf[SPATIAL_PREDICTION_PROBS]
+ [CDF_SIZE(MAX_SEGMENTS)];
};
static INLINE int segfeature_active(const struct segmentation *seg,
@@ -84,11 +65,26 @@ static INLINE int segfeature_active(const struct segmentation *seg,
return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id));
}
+static INLINE void segfeatures_copy(struct segmentation *dst,
+ const struct segmentation *src) {
+ int i, j;
+ for (i = 0; i < MAX_SEGMENTS; i++) {
+ dst->feature_mask[i] = src->feature_mask[i];
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ dst->feature_data[i][j] = src->feature_data[i][j];
+ }
+ }
+ dst->segid_preskip = src->segid_preskip;
+ dst->last_active_segid = src->last_active_segid;
+}
+
void av1_clearall_segfeatures(struct segmentation *seg);
void av1_enable_segfeature(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
+void calculate_segdata(struct segmentation *seg);
+
int av1_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
@@ -101,8 +97,6 @@ static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
return seg->feature_data[segment_id][feature_id];
}
-extern const aom_tree_index av1_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c
index 4c9fa6962..3fa998a91 100644
--- a/third_party/aom/av1/common/thread_common.c
+++ b/third_party/aom/av1/common/thread_common.c
@@ -9,40 +9,158 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_config.h"
+#include "config/aom_config.h"
+#include "config/aom_scale_rtcd.h"
+
#include "aom_dsp/aom_dsp_common.h"
#include "aom_mem/aom_mem.h"
+#include "av1/common/av1_loopfilter.h"
#include "av1/common/entropymode.h"
#include "av1/common/thread_common.h"
#include "av1/common/reconinter.h"
+// Set up nsync by width.
+static INLINE int get_sync_range(int width) {
+ // nsync numbers are picked by testing. For example, for 4k
+ // video, using 4 gives best performance.
+ if (width < 640)
+ return 1;
+ else if (width <= 1280)
+ return 2;
+ else if (width <= 4096)
+ return 4;
+ else
+ return 8;
+}
+
+static INLINE int get_lr_sync_range(int width) {
+#if 0
+ // nsync numbers are picked by testing. For example, for 4k
+ // video, using 4 gives best performance.
+ if (width < 640)
+ return 1;
+ else if (width <= 1280)
+ return 2;
+ else if (width <= 4096)
+ return 4;
+ else
+ return 8;
+#else
+ (void)width;
+ return 1;
+#endif
+}
+
+// Allocate memory for lf row synchronization
+static void loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
+ int width, int num_workers) {
+ lf_sync->rows = rows;
#if CONFIG_MULTITHREAD
-static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
- const int kMaxTryLocks = 4000;
- int locked = 0;
- int i;
+ {
+ int i, j;
+
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ CHECK_MEM_ERROR(cm, lf_sync->mutex_[j],
+ aom_malloc(sizeof(*(lf_sync->mutex_[j])) * rows));
+ if (lf_sync->mutex_[j]) {
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->mutex_[j][i], NULL);
+ }
+ }
- for (i = 0; i < kMaxTryLocks; ++i) {
- if (!pthread_mutex_trylock(mutex)) {
- locked = 1;
- break;
+ CHECK_MEM_ERROR(cm, lf_sync->cond_[j],
+ aom_malloc(sizeof(*(lf_sync->cond_[j])) * rows));
+ if (lf_sync->cond_[j]) {
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->cond_[j][i], NULL);
+ }
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->job_mutex,
+ aom_malloc(sizeof(*(lf_sync->job_mutex))));
+ if (lf_sync->job_mutex) {
+ pthread_mutex_init(lf_sync->job_mutex, NULL);
}
}
+#endif // CONFIG_MULTITHREAD
+ CHECK_MEM_ERROR(cm, lf_sync->lfdata,
+ aom_malloc(num_workers * sizeof(*(lf_sync->lfdata))));
+ lf_sync->num_workers = num_workers;
- if (!locked) pthread_mutex_lock(mutex);
+ for (int j = 0; j < MAX_MB_PLANE; j++) {
+ CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col[j],
+ aom_malloc(sizeof(*(lf_sync->cur_sb_col[j])) * rows));
+ }
+ CHECK_MEM_ERROR(
+ cm, lf_sync->job_queue,
+ aom_malloc(sizeof(*(lf_sync->job_queue)) * rows * MAX_MB_PLANE * 2));
+ // Set up nsync.
+ lf_sync->sync_range = get_sync_range(width);
}
+
+// Deallocate lf synchronization related mutex and data
+void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
+ if (lf_sync != NULL) {
+ int j;
+#if CONFIG_MULTITHREAD
+ int i;
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ if (lf_sync->mutex_[j] != NULL) {
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex_[j][i]);
+ }
+ aom_free(lf_sync->mutex_[j]);
+ }
+ if (lf_sync->cond_[j] != NULL) {
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->cond_[j][i]);
+ }
+ aom_free(lf_sync->cond_[j]);
+ }
+ }
+ if (lf_sync->job_mutex != NULL) {
+ pthread_mutex_destroy(lf_sync->job_mutex);
+ aom_free(lf_sync->job_mutex);
+ }
#endif // CONFIG_MULTITHREAD
+ aom_free(lf_sync->lfdata);
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ aom_free(lf_sync->cur_sb_col[j]);
+ }
+
+ aom_free(lf_sync->job_queue);
+ // clear the structure as the source of this call may be a resize in which
+ // case this call will be followed by an _alloc() which may fail.
+ av1_zero(*lf_sync);
+ }
+}
+
+static void loop_filter_data_reset(LFWorkerData *lf_data,
+ YV12_BUFFER_CONFIG *frame_buffer,
+ struct AV1Common *cm, MACROBLOCKD *xd) {
+ struct macroblockd_plane *pd = xd->plane;
+ lf_data->frame_buffer = frame_buffer;
+ lf_data->cm = cm;
+ lf_data->xd = xd;
+ for (int i = 0; i < MAX_MB_PLANE; i++) {
+ memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
+ lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
+ lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
+ }
+}
-static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c) {
+static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c,
+ int plane) {
#if CONFIG_MULTITHREAD
const int nsync = lf_sync->sync_range;
if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
- mutex_lock(mutex);
+ pthread_mutex_t *const mutex = &lf_sync->mutex_[plane][r - 1];
+ pthread_mutex_lock(mutex);
- while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
- pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
+ while (c > lf_sync->cur_sb_col[plane][r - 1] - nsync) {
+ pthread_cond_wait(&lf_sync->cond_[plane][r - 1], mutex);
}
pthread_mutex_unlock(mutex);
}
@@ -50,11 +168,12 @@ static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c) {
(void)lf_sync;
(void)r;
(void)c;
+ (void)plane;
#endif // CONFIG_MULTITHREAD
}
static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
- const int sb_cols) {
+ const int sb_cols, int plane) {
#if CONFIG_MULTITHREAD
const int nsync = lf_sync->sync_range;
int cur;
@@ -69,321 +188,156 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
}
if (sig) {
- mutex_lock(&lf_sync->mutex_[r]);
+ pthread_mutex_lock(&lf_sync->mutex_[plane][r]);
- lf_sync->cur_sb_col[r] = cur;
+ lf_sync->cur_sb_col[plane][r] = cur;
- pthread_cond_signal(&lf_sync->cond_[r]);
- pthread_mutex_unlock(&lf_sync->mutex_[r]);
+ pthread_cond_broadcast(&lf_sync->cond_[plane][r]);
+ pthread_mutex_unlock(&lf_sync->mutex_[plane][r]);
}
#else
(void)lf_sync;
(void)r;
(void)c;
(void)sb_cols;
+ (void)plane;
#endif // CONFIG_MULTITHREAD
}
-#if !CONFIG_EXT_PARTITION_TYPES
-static INLINE enum lf_path get_loop_filter_path(
- int y_only, struct macroblockd_plane *planes) {
- if (y_only)
- return LF_PATH_444;
- else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
- return LF_PATH_420;
- else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
- return LF_PATH_444;
- else
- return LF_PATH_SLOW;
-}
+static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start,
+ int stop, int plane_start, int plane_end) {
+ int mi_row, plane, dir;
+ AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
+ lf_sync->jobs_enqueued = 0;
+ lf_sync->jobs_dequeued = 0;
-static INLINE void loop_filter_block_plane_ver(
- AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
- MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
- LOOP_FILTER_MASK *lfm) {
- if (plane == 0) {
- av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
- } else {
- switch (path) {
- case LF_PATH_420:
- av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
- break;
- case LF_PATH_444:
- av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
- break;
- case LF_PATH_SLOW:
- av1_filter_block_plane_non420_ver(cm, &planes[plane], mi, mi_row,
- mi_col, plane);
+ for (dir = 0; dir < 2; dir++) {
+ for (plane = plane_start; plane < plane_end; plane++) {
+ if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
break;
+ else if (plane == 1 && !(cm->lf.filter_level_u))
+ continue;
+ else if (plane == 2 && !(cm->lf.filter_level_v))
+ continue;
+ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+ lf_job_queue->mi_row = mi_row;
+ lf_job_queue->plane = plane;
+ lf_job_queue->dir = dir;
+ lf_job_queue++;
+ lf_sync->jobs_enqueued++;
+ }
}
}
}
-static INLINE void loop_filter_block_plane_hor(
- AV1_COMMON *cm, struct macroblockd_plane *planes, int plane,
- MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
- LOOP_FILTER_MASK *lfm) {
- if (plane == 0) {
- av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
- } else {
- switch (path) {
- case LF_PATH_420:
- av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
- break;
- case LF_PATH_444:
- av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
- break;
- case LF_PATH_SLOW:
- av1_filter_block_plane_non420_hor(cm, &planes[plane], mi, mi_row,
- mi_col, plane);
- break;
- }
+AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
+ AV1LfMTInfo *cur_job_info = NULL;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(lf_sync->job_mutex);
+
+ if (lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
+ cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
+ lf_sync->jobs_dequeued++;
}
-}
-#endif
-// Row-based multi-threaded loopfilter hook
-#if CONFIG_PARALLEL_DEBLOCKING
-static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
- LFWorkerData *const lf_data) {
- const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
- int mi_row, mi_col;
-#if !CONFIG_EXT_PARTITION_TYPES
- enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
-#endif
- for (mi_row = lf_data->start; mi_row < lf_data->stop;
- mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
- MODE_INFO **const mi =
- lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
-
- for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
- mi_col += lf_data->cm->mib_size) {
- LOOP_FILTER_MASK lfm;
- int plane;
-
- av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
- lf_data->frame_buffer, mi_row, mi_col);
- av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
- lf_data->cm->mi_stride, &lfm);
-
-#if CONFIG_EXT_PARTITION_TYPES
- for (plane = 0; plane < num_planes; ++plane)
- av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
- mi + mi_col, mi_row, mi_col, plane);
-#else
- for (plane = 0; plane < num_planes; ++plane)
- loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
- mi + mi_col, mi_row, mi_col, path, &lfm);
+ pthread_mutex_unlock(lf_sync->job_mutex);
+#else
+ (void)lf_sync;
#endif
- }
- }
- return 1;
+
+ return cur_job_info;
}
-static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
- LFWorkerData *const lf_data) {
- const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
+// Implement row loopfiltering for each thread.
+static INLINE void thread_loop_filter_rows(
+ const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
+ struct macroblockd_plane *planes, MACROBLOCKD *xd,
+ AV1LfSync *const lf_sync) {
const int sb_cols =
- mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
- int mi_row, mi_col;
-#if !CONFIG_EXT_PARTITION_TYPES
- enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
-#endif
-
- for (mi_row = lf_data->start; mi_row < lf_data->stop;
- mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
- MODE_INFO **const mi =
- lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
-
- for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
- mi_col += lf_data->cm->mib_size) {
- const int r = mi_row >> lf_data->cm->mib_size_log2;
- const int c = mi_col >> lf_data->cm->mib_size_log2;
- LOOP_FILTER_MASK lfm;
- int plane;
-
- // TODO(wenhao.zhang@intel.com): For better parallelization, reorder
- // the outer loop to column-based and remove the synchronizations here.
- sync_read(lf_sync, r, c);
-
- av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
- lf_data->frame_buffer, mi_row, mi_col);
- av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
- lf_data->cm->mi_stride, &lfm);
-#if CONFIG_EXT_PARTITION_TYPES
- for (plane = 0; plane < num_planes; ++plane)
- av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
- mi + mi_col, mi_row, mi_col, plane);
-#else
- for (plane = 0; plane < num_planes; ++plane)
- loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
- mi + mi_col, mi_row, mi_col, path, &lfm);
-#endif
- sync_write(lf_sync, r, c, sb_cols);
+ ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
+ int mi_row, mi_col, plane, dir;
+ int r, c;
+
+ while (1) {
+ AV1LfMTInfo *cur_job_info = get_lf_job_info(lf_sync);
+
+ if (cur_job_info != NULL) {
+ mi_row = cur_job_info->mi_row;
+ plane = cur_job_info->plane;
+ dir = cur_job_info->dir;
+ r = mi_row >> MAX_MIB_SIZE_LOG2;
+
+ if (dir == 0) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+ c = mi_col >> MAX_MIB_SIZE_LOG2;
+
+ av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
+ mi_row, mi_col, plane, plane + 1);
+
+ av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
+ mi_col);
+ sync_write(lf_sync, r, c, sb_cols, plane);
+ }
+ } else if (dir == 1) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+ c = mi_col >> MAX_MIB_SIZE_LOG2;
+
+ // Wait for vertical edge filtering of the top-right block to be
+ // completed
+ sync_read(lf_sync, r, c, plane);
+
+ // Wait for vertical edge filtering of the right block to be
+ // completed
+ sync_read(lf_sync, r + 1, c, plane);
+
+ av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
+ mi_row, mi_col, plane, plane + 1);
+ av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
+ mi_col);
+ }
+ }
+ } else {
+ break;
}
}
- return 1;
}
-#else // CONFIG_PARALLEL_DEBLOCKING
+
+// Row-based multi-threaded loopfilter hook
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
- const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
- const int sb_cols =
- mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
- int mi_row, mi_col;
-#if !CONFIG_EXT_PARTITION_TYPES
- enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
-#endif // !CONFIG_EXT_PARTITION_TYPES
-
-#if CONFIG_EXT_PARTITION
- printf(
- "STOPPING: This code has not been modified to work with the "
- "extended coding unit size experiment");
- exit(EXIT_FAILURE);
-#endif // CONFIG_EXT_PARTITION
-
- for (mi_row = lf_data->start; mi_row < lf_data->stop;
- mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
- MODE_INFO **const mi =
- lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
-
- for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
- mi_col += lf_data->cm->mib_size) {
- const int r = mi_row >> lf_data->cm->mib_size_log2;
- const int c = mi_col >> lf_data->cm->mib_size_log2;
-#if !CONFIG_EXT_PARTITION_TYPES
- LOOP_FILTER_MASK lfm;
-#endif
- int plane;
-
- sync_read(lf_sync, r, c);
-
- av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
- lf_data->frame_buffer, mi_row, mi_col);
-#if CONFIG_EXT_PARTITION_TYPES
- for (plane = 0; plane < num_planes; ++plane) {
- av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
- mi + mi_col, mi_row, mi_col, plane);
- av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
- mi + mi_col, mi_row, mi_col, plane);
- }
-#else
- av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
- lf_data->cm->mi_stride, &lfm);
-
- for (plane = 0; plane < num_planes; ++plane) {
- loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
- mi + mi_col, mi_row, mi_col, path, &lfm);
- loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
- mi + mi_col, mi_row, mi_col, path, &lfm);
- }
-#endif // CONFIG_EXT_PARTITION_TYPES
- sync_write(lf_sync, r, c, sb_cols);
- }
- }
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->xd, lf_sync);
return 1;
}
-#endif // CONFIG_PARALLEL_DEBLOCKING
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- struct macroblockd_plane *planes, int start,
- int stop, int y_only, AVxWorker *workers,
- int nworkers, AV1LfSync *lf_sync) {
-#if CONFIG_EXT_PARTITION
- printf(
- "STOPPING: This code has not been modified to work with the "
- "extended coding unit size experiment");
- exit(EXIT_FAILURE);
-#endif // CONFIG_EXT_PARTITION
-
+ MACROBLOCKD *xd, int start, int stop,
+ int plane_start, int plane_end,
+ AVxWorker *workers, int nworkers,
+ AV1LfSync *lf_sync) {
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
// Number of superblock rows and cols
- const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
- // Decoder may allocate more threads than number of tiles based on user's
- // input.
- const int tile_cols = cm->tile_cols;
- const int num_workers = AOMMIN(nworkers, tile_cols);
+ const int sb_rows =
+ ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
+ const int num_workers = nworkers;
int i;
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
num_workers > lf_sync->num_workers) {
av1_loop_filter_dealloc(lf_sync);
- av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
-// Set up loopfilter thread data.
-// The decoder is capping num_workers because it has been observed that using
-// more threads on the loopfilter than there are cores will hurt performance
-// on Android. This is because the system will only schedule the tile decode
-// workers on cores equal to the number of tile columns. Then if the decoder
-// tries to use more threads for the loopfilter, it will hurt performance
-// because of contention. If the multithreading code changes in the future
-// then the number of workers used by the loopfilter should be revisited.
-
-#if CONFIG_PARALLEL_DEBLOCKING
// Initialize cur_sb_col to -1 for all SB rows.
- memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
-
- // Filter all the vertical edges in the whole frame
- for (i = 0; i < num_workers; ++i) {
- AVxWorker *const worker = &workers[i];
- LFWorkerData *const lf_data = &lf_sync->lfdata[i];
-
- worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
- worker->data1 = lf_sync;
- worker->data2 = lf_data;
-
- // Loopfilter data
- av1_loop_filter_data_reset(lf_data, frame, cm, planes);
- lf_data->start = start + i * cm->mib_size;
- lf_data->stop = stop;
- lf_data->y_only = y_only;
-
- // Start loopfiltering
- if (i == num_workers - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
- }
- }
-
- // Wait till all rows are finished
- for (i = 0; i < num_workers; ++i) {
- winterface->sync(&workers[i]);
- }
-
- memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
- // Filter all the horizontal edges in the whole frame
- for (i = 0; i < num_workers; ++i) {
- AVxWorker *const worker = &workers[i];
- LFWorkerData *const lf_data = &lf_sync->lfdata[i];
-
- worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
- worker->data1 = lf_sync;
- worker->data2 = lf_data;
-
- // Loopfilter data
- av1_loop_filter_data_reset(lf_data, frame, cm, planes);
- lf_data->start = start + i * cm->mib_size;
- lf_data->stop = stop;
- lf_data->y_only = y_only;
-
- // Start loopfiltering
- if (i == num_workers - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
- }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ memset(lf_sync->cur_sb_col[i], -1,
+ sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
}
- // Wait till all rows are finished
- for (i = 0; i < num_workers; ++i) {
- winterface->sync(&workers[i]);
- }
-#else // CONFIG_PARALLEL_DEBLOCKING
- // Initialize cur_sb_col to -1 for all SB rows.
- memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+ enqueue_lf_jobs(lf_sync, cm, start, stop, plane_start, plane_end);
+ // Set up loopfilter thread data.
for (i = 0; i < num_workers; ++i) {
AVxWorker *const worker = &workers[i];
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
@@ -393,10 +347,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
worker->data2 = lf_data;
// Loopfilter data
- av1_loop_filter_data_reset(lf_data, frame, cm, planes);
- lf_data->start = start + i * cm->mib_size;
- lf_data->stop = stop;
- lf_data->y_only = y_only;
+ loop_filter_data_reset(lf_data, frame, cm, xd);
// Start loopfiltering
if (i == num_workers - 1) {
@@ -410,21 +361,14 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
for (i = 0; i < num_workers; ++i) {
winterface->sync(&workers[i]);
}
-#endif // CONFIG_PARALLEL_DEBLOCKING
}
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- struct macroblockd_plane *planes,
- int frame_filter_level,
-#if CONFIG_LOOPFILTER_LEVEL
- int frame_filter_level_r,
-#endif
- int y_only, int partial_frame, AVxWorker *workers,
+ MACROBLOCKD *xd, int plane_start, int plane_end,
+ int partial_frame, AVxWorker *workers,
int num_workers, AV1LfSync *lf_sync) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
- if (!frame_filter_level) return;
-
start_mi_row = 0;
mi_rows_to_filter = cm->mi_rows;
if (partial_frame && cm->mi_rows > 8) {
@@ -433,103 +377,406 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
-#if CONFIG_LOOPFILTER_LEVEL
- av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
- y_only);
+ av1_loop_filter_frame_init(cm, plane_start, plane_end);
+
+ loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
+ plane_end, workers, num_workers, lf_sync);
+}
+
+static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) {
+#if CONFIG_MULTITHREAD
+ AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
+ const int nsync = loop_res_sync->sync_range;
+
+ if (r && !(c & (nsync - 1))) {
+ pthread_mutex_t *const mutex = &loop_res_sync->mutex_[plane][r - 1];
+ pthread_mutex_lock(mutex);
+
+ while (c > loop_res_sync->cur_sb_col[plane][r - 1] - nsync) {
+ pthread_cond_wait(&loop_res_sync->cond_[plane][r - 1], mutex);
+ }
+ pthread_mutex_unlock(mutex);
+ }
#else
- av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
-#endif // CONFIG_LOOPFILTER_LEVEL
- loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
- workers, num_workers, lf_sync);
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)plane;
+#endif // CONFIG_MULTITHREAD
}
-// Set up nsync by width.
-static INLINE int get_sync_range(int width) {
- // nsync numbers are picked by testing. For example, for 4k
- // video, using 4 gives best performance.
- if (width < 640)
- return 1;
- else if (width <= 1280)
- return 2;
- else if (width <= 4096)
- return 4;
- else
- return 8;
+static INLINE void lr_sync_write(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane) {
+#if CONFIG_MULTITHREAD
+ AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
+ const int nsync = loop_res_sync->sync_range;
+ int cur;
+ // Only signal when there are enough filtered SB for next row to run.
+ int sig = 1;
+
+ if (c < sb_cols - 1) {
+ cur = c;
+ if (c % nsync) sig = 0;
+ } else {
+ cur = sb_cols + nsync;
+ }
+
+ if (sig) {
+ pthread_mutex_lock(&loop_res_sync->mutex_[plane][r]);
+
+ loop_res_sync->cur_sb_col[plane][r] = cur;
+
+ pthread_cond_broadcast(&loop_res_sync->cond_[plane][r]);
+ pthread_mutex_unlock(&loop_res_sync->mutex_[plane][r]);
+ }
+#else
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+ (void)plane;
+#endif // CONFIG_MULTITHREAD
}
-// Allocate memory for lf row synchronization
-void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
- int width, int num_workers) {
- lf_sync->rows = rows;
+// Allocate memory for loop restoration row synchronization
+static void loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
+ int num_workers, int num_rows_lr,
+ int num_planes, int width) {
+ lr_sync->rows = num_rows_lr;
+ lr_sync->num_planes = num_planes;
#if CONFIG_MULTITHREAD
{
- int i;
+ int i, j;
+
+ for (j = 0; j < num_planes; j++) {
+ CHECK_MEM_ERROR(cm, lr_sync->mutex_[j],
+ aom_malloc(sizeof(*(lr_sync->mutex_[j])) * num_rows_lr));
+ if (lr_sync->mutex_[j]) {
+ for (i = 0; i < num_rows_lr; ++i) {
+ pthread_mutex_init(&lr_sync->mutex_[j][i], NULL);
+ }
+ }
- CHECK_MEM_ERROR(cm, lf_sync->mutex_,
- aom_malloc(sizeof(*lf_sync->mutex_) * rows));
- if (lf_sync->mutex_) {
- for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ CHECK_MEM_ERROR(cm, lr_sync->cond_[j],
+ aom_malloc(sizeof(*(lr_sync->cond_[j])) * num_rows_lr));
+ if (lr_sync->cond_[j]) {
+ for (i = 0; i < num_rows_lr; ++i) {
+ pthread_cond_init(&lr_sync->cond_[j][i], NULL);
+ }
}
}
- CHECK_MEM_ERROR(cm, lf_sync->cond_,
- aom_malloc(sizeof(*lf_sync->cond_) * rows));
- if (lf_sync->cond_) {
- for (i = 0; i < rows; ++i) {
- pthread_cond_init(&lf_sync->cond_[i], NULL);
- }
+ CHECK_MEM_ERROR(cm, lr_sync->job_mutex,
+ aom_malloc(sizeof(*(lr_sync->job_mutex))));
+ if (lr_sync->job_mutex) {
+ pthread_mutex_init(lr_sync->job_mutex, NULL);
}
}
#endif // CONFIG_MULTITHREAD
+ CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata,
+ aom_malloc(num_workers * sizeof(*(lr_sync->lrworkerdata))));
- CHECK_MEM_ERROR(cm, lf_sync->lfdata,
- aom_malloc(num_workers * sizeof(*lf_sync->lfdata)));
- lf_sync->num_workers = num_workers;
+ for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
+ if (worker_idx < num_workers - 1) {
+ CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rst_tmpbuf,
+ (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
+ CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rlbs,
+ aom_malloc(sizeof(RestorationLineBuffers)));
+
+ } else {
+ lr_sync->lrworkerdata[worker_idx].rst_tmpbuf = cm->rst_tmpbuf;
+ lr_sync->lrworkerdata[worker_idx].rlbs = cm->rlbs;
+ }
+ }
- CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
- aom_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+ lr_sync->num_workers = num_workers;
+ for (int j = 0; j < num_planes; j++) {
+ CHECK_MEM_ERROR(
+ cm, lr_sync->cur_sb_col[j],
+ aom_malloc(sizeof(*(lr_sync->cur_sb_col[j])) * num_rows_lr));
+ }
+ CHECK_MEM_ERROR(
+ cm, lr_sync->job_queue,
+ aom_malloc(sizeof(*(lr_sync->job_queue)) * num_rows_lr * num_planes));
// Set up nsync.
- lf_sync->sync_range = get_sync_range(width);
+ lr_sync->sync_range = get_lr_sync_range(width);
}
-// Deallocate lf synchronization related mutex and data
-void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
- if (lf_sync != NULL) {
+// Deallocate loop restoration synchronization related mutex and data
+void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers) {
+ if (lr_sync != NULL) {
+ int j;
#if CONFIG_MULTITHREAD
int i;
-
- if (lf_sync->mutex_ != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ if (lr_sync->mutex_[j] != NULL) {
+ for (i = 0; i < lr_sync->rows; ++i) {
+ pthread_mutex_destroy(&lr_sync->mutex_[j][i]);
+ }
+ aom_free(lr_sync->mutex_[j]);
}
- aom_free(lf_sync->mutex_);
- }
- if (lf_sync->cond_ != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->cond_[i]);
+ if (lr_sync->cond_[j] != NULL) {
+ for (i = 0; i < lr_sync->rows; ++i) {
+ pthread_cond_destroy(&lr_sync->cond_[j][i]);
+ }
+ aom_free(lr_sync->cond_[j]);
}
- aom_free(lf_sync->cond_);
+ }
+ if (lr_sync->job_mutex != NULL) {
+ pthread_mutex_destroy(lr_sync->job_mutex);
+ aom_free(lr_sync->job_mutex);
}
#endif // CONFIG_MULTITHREAD
- aom_free(lf_sync->lfdata);
- aom_free(lf_sync->cur_sb_col);
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ aom_free(lr_sync->cur_sb_col[j]);
+ }
+
+ aom_free(lr_sync->job_queue);
+
+ if (lr_sync->lrworkerdata) {
+ for (int worker_idx = 0; worker_idx < num_workers - 1; worker_idx++) {
+ LRWorkerData *const workerdata_data =
+ lr_sync->lrworkerdata + worker_idx;
+
+ aom_free(workerdata_data->rst_tmpbuf);
+ aom_free(workerdata_data->rlbs);
+ }
+ aom_free(lr_sync->lrworkerdata);
+ }
+
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
- av1_zero(*lf_sync);
+ av1_zero(*lr_sync);
+ }
+}
+
+static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt,
+ AV1_COMMON *cm) {
+ FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
+
+ const int num_planes = av1_num_planes(cm);
+ AV1LrMTInfo *lr_job_queue = lr_sync->job_queue;
+ int32_t lr_job_counter[2], num_even_lr_jobs = 0;
+ lr_sync->jobs_enqueued = 0;
+ lr_sync->jobs_dequeued = 0;
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+ num_even_lr_jobs =
+ num_even_lr_jobs + ((ctxt[plane].rsi->vert_units_per_tile + 1) >> 1);
+ }
+ lr_job_counter[0] = 0;
+ lr_job_counter[1] = num_even_lr_jobs;
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+ const int is_uv = plane > 0;
+ const int ss_y = is_uv && cm->subsampling_y;
+
+ AV1PixelRect tile_rect = ctxt[plane].tile_rect;
+ const int unit_size = ctxt[plane].rsi->restoration_unit_size;
+
+ const int tile_h = tile_rect.bottom - tile_rect.top;
+ const int ext_size = unit_size * 3 / 2;
+
+ int y0 = 0, i = 0;
+ while (y0 < tile_h) {
+ int remaining_h = tile_h - y0;
+ int h = (remaining_h < ext_size) ? remaining_h : unit_size;
+
+ RestorationTileLimits limits;
+ limits.v_start = tile_rect.top + y0;
+ limits.v_end = tile_rect.top + y0 + h;
+ assert(limits.v_end <= tile_rect.bottom);
+ // Offset the tile upwards to align with the restoration processing stripe
+ const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
+ limits.v_start = AOMMAX(tile_rect.top, limits.v_start - voffset);
+ if (limits.v_end < tile_rect.bottom) limits.v_end -= voffset;
+
+ assert(lr_job_counter[0] <= num_even_lr_jobs);
+
+ lr_job_queue[lr_job_counter[i & 1]].lr_unit_row = i;
+ lr_job_queue[lr_job_counter[i & 1]].plane = plane;
+ lr_job_queue[lr_job_counter[i & 1]].v_start = limits.v_start;
+ lr_job_queue[lr_job_counter[i & 1]].v_end = limits.v_end;
+ lr_job_queue[lr_job_counter[i & 1]].sync_mode = i & 1;
+ if ((i & 1) == 0) {
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
+ limits.v_start + RESTORATION_BORDER;
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
+ limits.v_end - RESTORATION_BORDER;
+ if (i == 0) {
+ assert(limits.v_start == tile_rect.top);
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_start = tile_rect.top;
+ }
+ if (i == (ctxt[plane].rsi->vert_units_per_tile - 1)) {
+ assert(limits.v_end == tile_rect.bottom);
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_end = tile_rect.bottom;
+ }
+ } else {
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
+ AOMMAX(limits.v_start - RESTORATION_BORDER, tile_rect.top);
+ lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
+ AOMMIN(limits.v_end + RESTORATION_BORDER, tile_rect.bottom);
+ }
+ lr_job_counter[i & 1]++;
+ lr_sync->jobs_enqueued++;
+
+ y0 += h;
+ ++i;
+ }
+ }
+}
+
+AV1LrMTInfo *get_lr_job_info(AV1LrSync *lr_sync) {
+ AV1LrMTInfo *cur_job_info = NULL;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(lr_sync->job_mutex);
+
+ if (lr_sync->jobs_dequeued < lr_sync->jobs_enqueued) {
+ cur_job_info = lr_sync->job_queue + lr_sync->jobs_dequeued;
+ lr_sync->jobs_dequeued++;
+ }
+
+ pthread_mutex_unlock(lr_sync->job_mutex);
+#else
+ (void)lr_sync;
+#endif
+
+ return cur_job_info;
+}
+
+// Implement row loop restoration for each thread.
+static int loop_restoration_row_worker(AV1LrSync *const lr_sync,
+ LRWorkerData *lrworkerdata) {
+ AV1LrStruct *lr_ctxt = (AV1LrStruct *)lrworkerdata->lr_ctxt;
+ FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
+ int lr_unit_row;
+ int plane;
+ const int tile_row = LR_TILE_ROW;
+ const int tile_col = LR_TILE_COL;
+ const int tile_cols = LR_TILE_COLS;
+ const int tile_idx = tile_col + tile_row * tile_cols;
+ typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
+ int vstart, int vend);
+ static const copy_fun copy_funs[3] = {
+ aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
+ };
+
+ while (1) {
+ AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync);
+ if (cur_job_info != NULL) {
+ RestorationTileLimits limits;
+ sync_read_fn_t on_sync_read;
+ sync_write_fn_t on_sync_write;
+ limits.v_start = cur_job_info->v_start;
+ limits.v_end = cur_job_info->v_end;
+ lr_unit_row = cur_job_info->lr_unit_row;
+ plane = cur_job_info->plane;
+ const int unit_idx0 = tile_idx * ctxt[plane].rsi->units_per_tile;
+
+ // sync_mode == 1 implies only sync read is required in LR Multi-threading
+ // sync_mode == 0 implies only sync write is required.
+ on_sync_read =
+ cur_job_info->sync_mode == 1 ? lr_sync_read : av1_lr_sync_read_dummy;
+ on_sync_write = cur_job_info->sync_mode == 0 ? lr_sync_write
+ : av1_lr_sync_write_dummy;
+
+ av1_foreach_rest_unit_in_row(
+ &limits, &(ctxt[plane].tile_rect), lr_ctxt->on_rest_unit, lr_unit_row,
+ ctxt[plane].rsi->restoration_unit_size, unit_idx0,
+ ctxt[plane].rsi->horz_units_per_tile,
+ ctxt[plane].rsi->vert_units_per_tile, plane, &ctxt[plane],
+ lrworkerdata->rst_tmpbuf, lrworkerdata->rlbs, on_sync_read,
+ on_sync_write, lr_sync);
+
+ copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left,
+ ctxt[plane].tile_rect.right, cur_job_info->v_copy_start,
+ cur_job_info->v_copy_end);
+ } else {
+ break;
+ }
+ }
+ return 1;
+}
+
+static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
+ AVxWorker *workers, int nworkers,
+ AV1LrSync *lr_sync, AV1_COMMON *cm) {
+ FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
+
+ const int num_planes = av1_num_planes(cm);
+
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ int num_rows_lr = 0;
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ const AV1PixelRect tile_rect = ctxt[plane].tile_rect;
+ const int max_tile_h = tile_rect.bottom - tile_rect.top;
+
+ const int unit_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
+
+ num_rows_lr =
+ AOMMAX(num_rows_lr, av1_lr_count_units_in_tile(unit_size, max_tile_h));
+ }
+
+ const int num_workers = nworkers;
+ int i;
+ assert(MAX_MB_PLANE == 3);
+
+ if (!lr_sync->sync_range || num_rows_lr != lr_sync->rows ||
+ num_workers > lr_sync->num_workers || num_planes != lr_sync->num_planes) {
+ av1_loop_restoration_dealloc(lr_sync, num_workers);
+ loop_restoration_alloc(lr_sync, cm, num_workers, num_rows_lr, num_planes,
+ cm->width);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ for (i = 0; i < num_planes; i++) {
+ memset(lr_sync->cur_sb_col[i], -1,
+ sizeof(*(lr_sync->cur_sb_col[i])) * num_rows_lr);
+ }
+
+ enqueue_lr_jobs(lr_sync, lr_ctxt, cm);
+
+ // Set up looprestoration thread data.
+ for (i = 0; i < num_workers; ++i) {
+ AVxWorker *const worker = &workers[i];
+ lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt;
+ worker->hook = (AVxWorkerHook)loop_restoration_row_worker;
+ worker->data1 = lr_sync;
+ worker->data2 = &lr_sync->lrworkerdata[i];
+
+ // Start loopfiltering
+ if (i == num_workers - 1) {
+ winterface->execute(worker);
+ } else {
+ winterface->launch(worker);
+ }
+ }
+
+ // Wait till all rows are finished
+ for (i = 0; i < num_workers; ++i) {
+ winterface->sync(&workers[i]);
}
}
-// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
-// members, so we treat it as an array, and sum over the whole length.
-void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
- FRAME_COUNTS *counts) {
- unsigned int *const acc = (unsigned int *)acc_counts;
- const unsigned int *const cnt = (unsigned int *)counts;
+void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+ AV1_COMMON *cm, int optimized_lr,
+ AVxWorker *workers, int num_workers,
+ AV1LrSync *lr_sync, void *lr_ctxt) {
+ assert(!cm->all_lossless);
+
+ const int num_planes = av1_num_planes(cm);
+
+ AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
- const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
- unsigned int i;
+ av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
+ optimized_lr, num_planes);
- for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
+ foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
+ cm);
}
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
index 7eddc662c..4b0d5d2b8 100644
--- a/third_party/aom/av1/common/thread_common.h
+++ b/third_party/aom/av1/common/thread_common.h
@@ -11,7 +11,9 @@
#ifndef AV1_COMMON_LOOPFILTER_THREAD_H_
#define AV1_COMMON_LOOPFILTER_THREAD_H_
-#include "./aom_config.h"
+
+#include "config/aom_config.h"
+
#include "av1/common/av1_loopfilter.h"
#include "aom_util/aom_thread.h"
@@ -20,16 +22,21 @@ extern "C" {
#endif
struct AV1Common;
-struct FRAME_COUNTS;
+
+typedef struct AV1LfMTInfo {
+ int mi_row;
+ int plane;
+ int dir;
+} AV1LfMTInfo;
// Loopfilter row synchronization
typedef struct AV1LfSyncData {
#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_;
- pthread_cond_t *cond_;
+ pthread_mutex_t *mutex_[MAX_MB_PLANE];
+ pthread_cond_t *cond_[MAX_MB_PLANE];
#endif
// Allocate memory to store the loop-filtered superblock index in each row.
- int *cur_sb_col;
+ int *cur_sb_col[MAX_MB_PLANE];
// The optimal sync_range for different resolution and platform should be
// determined by testing. Currently, it is chosen to be a power-of-2 number.
int sync_range;
@@ -38,27 +45,72 @@ typedef struct AV1LfSyncData {
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *job_mutex;
+#endif
+ AV1LfMTInfo *job_queue;
+ int jobs_enqueued;
+ int jobs_dequeued;
} AV1LfSync;
-// Allocate memory for loopfilter row synchronization.
-void av1_loop_filter_alloc(AV1LfSync *lf_sync, struct AV1Common *cm, int rows,
- int width, int num_workers);
+typedef struct AV1LrMTInfo {
+ int v_start;
+ int v_end;
+ int lr_unit_row;
+ int plane;
+ int sync_mode;
+ int v_copy_start;
+ int v_copy_end;
+} AV1LrMTInfo;
+
+typedef struct LoopRestorationWorkerData {
+ int32_t *rst_tmpbuf;
+ void *rlbs;
+ void *lr_ctxt;
+} LRWorkerData;
+
+// Looprestoration row synchronization
+typedef struct AV1LrSyncData {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *mutex_[MAX_MB_PLANE];
+ pthread_cond_t *cond_[MAX_MB_PLANE];
+#endif
+ // Allocate memory to store the loop-restoration block index in each row.
+ int *cur_sb_col[MAX_MB_PLANE];
+ // The optimal sync_range for different resolution and platform should be
+ // determined by testing. Currently, it is chosen to be a power-of-2 number.
+ int sync_range;
+ int rows;
+ int num_planes;
+
+ int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *job_mutex;
+#endif
+ // Row-based parallel loopfilter data
+ LRWorkerData *lrworkerdata;
+
+ AV1LrMTInfo *job_queue;
+ int jobs_enqueued;
+ int jobs_dequeued;
+} AV1LrSync;
// Deallocate loopfilter synchronization related mutex and data.
void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
-// Multi-threaded loopfilter that uses the tile threads.
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd_plane *planes,
- int frame_filter_level,
-#if CONFIG_LOOPFILTER_LEVEL
- int frame_filter_level_r,
-#endif
- int y_only, int partial_frame, AVxWorker *workers,
- int num_workers, AV1LfSync *lf_sync);
-
-void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts,
- struct FRAME_COUNTS *counts);
+ struct macroblockd *mbd, int plane_start,
+ int plane_end, int partial_frame,
+ AVxWorker *workers, int num_workers,
+ AV1LfSync *lf_sync);
+void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+ struct AV1Common *cm,
+ int optimized_lr, AVxWorker *workers,
+ int num_workers, AV1LrSync *lr_sync,
+ void *lr_ctxt);
+void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/tile_common.c b/third_party/aom/av1/common/tile_common.c
index 507a01265..9a43ab29a 100644
--- a/third_party/aom/av1/common/tile_common.c
+++ b/third_party/aom/av1/common/tile_common.c
@@ -11,32 +11,14 @@
#include "av1/common/tile_common.h"
#include "av1/common/onyxc_int.h"
+#include "av1/common/resize.h"
#include "aom_dsp/aom_dsp_common.h"
-#if CONFIG_DEPENDENT_HORZTILES
-void av1_tile_set_tg_boundary(TileInfo *tile, const AV1_COMMON *const cm,
- int row, int col) {
- const int tg_start_row = cm->tile_group_start_row[row][col];
- const int tg_start_col = cm->tile_group_start_col[row][col];
- tile->tg_horz_boundary = ((row == tg_start_row && col >= tg_start_col) ||
- (row == tg_start_row + 1 && col < tg_start_col));
-#if CONFIG_MAX_TILE
- if (cm->tile_row_independent[row]) {
- tile->tg_horz_boundary = 1; // this tile row is independent
- }
-#endif
-}
-#endif
void av1_tile_init(TileInfo *tile, const AV1_COMMON *cm, int row, int col) {
av1_tile_set_row(tile, cm, row);
av1_tile_set_col(tile, cm, col);
-#if CONFIG_DEPENDENT_HORZTILES
- av1_tile_set_tg_boundary(tile, cm, row, col);
-#endif
}
-#if CONFIG_MAX_TILE
-
// Find smallest k>=0 such that (blk_size << k) >= target
static int tile_log2(int blk_size, int target) {
int k;
@@ -46,25 +28,27 @@ static int tile_log2(int blk_size, int target) {
}
void av1_get_tile_limits(AV1_COMMON *const cm) {
- int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
- int sb_cols = mi_cols >> MAX_MIB_SIZE_LOG2;
- int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+ int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
+ int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
+ int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
+ int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
- cm->min_log2_tile_cols = tile_log2(MAX_TILE_WIDTH_SB, sb_cols);
+ int sb_size_log2 = cm->seq_params.mib_size_log2 + MI_SIZE_LOG2;
+ cm->max_tile_width_sb = MAX_TILE_WIDTH >> sb_size_log2;
+ int max_tile_area_sb = MAX_TILE_AREA >> (2 * sb_size_log2);
+
+ cm->min_log2_tile_cols = tile_log2(cm->max_tile_width_sb, sb_cols);
cm->max_log2_tile_cols = tile_log2(1, AOMMIN(sb_cols, MAX_TILE_COLS));
cm->max_log2_tile_rows = tile_log2(1, AOMMIN(sb_rows, MAX_TILE_ROWS));
- cm->min_log2_tiles = tile_log2(MAX_TILE_AREA_SB, sb_cols * sb_rows);
+ cm->min_log2_tiles = tile_log2(max_tile_area_sb, sb_cols * sb_rows);
cm->min_log2_tiles = AOMMAX(cm->min_log2_tiles, cm->min_log2_tile_cols);
- // TODO(dominic.symes@arm.com):
- // Add in levelMinLog2Tiles as a lower limit when levels are defined
}
void av1_calculate_tile_cols(AV1_COMMON *const cm) {
- int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
- int sb_cols = mi_cols >> MAX_MIB_SIZE_LOG2;
- int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+ int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
+ int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
+ int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
+ int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
int i;
if (cm->uniform_tile_spacing_flag) {
@@ -80,24 +64,27 @@ void av1_calculate_tile_cols(AV1_COMMON *const cm) {
cm->tile_col_start_sb[i] = sb_cols;
cm->min_log2_tile_rows = AOMMAX(cm->min_log2_tiles - cm->log2_tile_cols, 0);
cm->max_tile_height_sb = sb_rows >> cm->min_log2_tile_rows;
+
+ cm->tile_width = size_sb << cm->seq_params.mib_size_log2;
+ cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols);
} else {
int max_tile_area_sb = (sb_rows * sb_cols);
- int max_tile_width_sb = 0;
+ int widest_tile_sb = 1;
cm->log2_tile_cols = tile_log2(1, cm->tile_cols);
for (i = 0; i < cm->tile_cols; i++) {
int size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
- max_tile_width_sb = AOMMAX(max_tile_width_sb, size_sb);
+ widest_tile_sb = AOMMAX(widest_tile_sb, size_sb);
}
if (cm->min_log2_tiles) {
max_tile_area_sb >>= (cm->min_log2_tiles + 1);
}
- cm->max_tile_height_sb = AOMMAX(max_tile_area_sb / max_tile_width_sb, 1);
+ cm->max_tile_height_sb = AOMMAX(max_tile_area_sb / widest_tile_sb, 1);
}
}
void av1_calculate_tile_rows(AV1_COMMON *const cm) {
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
- int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+ int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
+ int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
int start_sb, size_sb, i;
if (cm->uniform_tile_spacing_flag) {
@@ -110,106 +97,34 @@ void av1_calculate_tile_rows(AV1_COMMON *const cm) {
}
cm->tile_rows = i;
cm->tile_row_start_sb[i] = sb_rows;
+
+ cm->tile_height = size_sb << cm->seq_params.mib_size_log2;
+ cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows);
} else {
cm->log2_tile_rows = tile_log2(1, cm->tile_rows);
}
-
-#if CONFIG_DEPENDENT_HORZTILES
- // Record which tile rows must be indpendent for parallelism
- for (i = 0, start_sb = 0; i < cm->tile_rows; i++) {
- cm->tile_row_independent[i] = 0;
- if (cm->tile_row_start_sb[i + 1] - start_sb > cm->max_tile_height_sb) {
- cm->tile_row_independent[i] = 1;
- start_sb = cm->tile_row_start_sb[i];
- }
- }
-#endif
}
void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
assert(row < cm->tile_rows);
- int mi_row_start = cm->tile_row_start_sb[row] << MAX_MIB_SIZE_LOG2;
- int mi_row_end = cm->tile_row_start_sb[row + 1] << MAX_MIB_SIZE_LOG2;
+ int mi_row_start = cm->tile_row_start_sb[row] << cm->seq_params.mib_size_log2;
+ int mi_row_end = cm->tile_row_start_sb[row + 1]
+ << cm->seq_params.mib_size_log2;
+ tile->tile_row = row;
tile->mi_row_start = mi_row_start;
tile->mi_row_end = AOMMIN(mi_row_end, cm->mi_rows);
+ assert(tile->mi_row_end > tile->mi_row_start);
}
void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
assert(col < cm->tile_cols);
- int mi_col_start = cm->tile_col_start_sb[col] << MAX_MIB_SIZE_LOG2;
- int mi_col_end = cm->tile_col_start_sb[col + 1] << MAX_MIB_SIZE_LOG2;
+ int mi_col_start = cm->tile_col_start_sb[col] << cm->seq_params.mib_size_log2;
+ int mi_col_end = cm->tile_col_start_sb[col + 1]
+ << cm->seq_params.mib_size_log2;
+ tile->tile_col = col;
tile->mi_col_start = mi_col_start;
tile->mi_col_end = AOMMIN(mi_col_end, cm->mi_cols);
-}
-
-#else
-
-void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
- tile->mi_row_start = row * cm->tile_height;
- tile->mi_row_end = AOMMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows);
-}
-
-void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
- tile->mi_col_start = col * cm->tile_width;
- tile->mi_col_end = AOMMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols);
-}
-
-#if CONFIG_EXT_PARTITION
-#define MIN_TILE_WIDTH_MAX_SB 2
-#define MAX_TILE_WIDTH_MAX_SB 32
-#else
-#define MIN_TILE_WIDTH_MAX_SB 4
-#define MAX_TILE_WIDTH_MAX_SB 64
-#endif // CONFIG_EXT_PARTITION
-
-static int get_min_log2_tile_cols(int max_sb_cols) {
- int min_log2 = 0;
- while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) ++min_log2;
- return min_log2;
-}
-
-static int get_max_log2_tile_cols(int max_sb_cols) {
- int max_log2 = 1;
- while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) ++max_log2;
- return max_log2 - 1;
-}
-
-void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
- int *max_log2_tile_cols) {
- const int max_sb_cols =
- ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
- *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols);
- *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
- assert(*min_log2_tile_cols <= *max_log2_tile_cols);
-}
-#endif // CONFIG_MAX_TILE
-
-void av1_setup_frame_boundary_info(const AV1_COMMON *const cm) {
- MODE_INFO *mi = cm->mi;
- int col;
- for (col = 0; col < cm->mi_cols; ++col) {
- mi->mbmi.boundary_info |= FRAME_ABOVE_BOUNDARY | TILE_ABOVE_BOUNDARY;
- mi += 1;
- }
-
- mi = cm->mi;
- int row;
- for (row = 0; row < cm->mi_rows; ++row) {
- mi->mbmi.boundary_info |= FRAME_LEFT_BOUNDARY | TILE_LEFT_BOUNDARY;
- mi += cm->mi_stride;
- }
-
- mi = cm->mi + (cm->mi_rows - 1) * cm->mi_stride;
- for (col = 0; col < cm->mi_cols; ++col) {
- mi->mbmi.boundary_info |= FRAME_BOTTOM_BOUNDARY | TILE_BOTTOM_BOUNDARY;
- mi += 1;
- }
-
- mi = cm->mi + cm->mi_cols - 1;
- for (row = 0; row < cm->mi_rows; ++row) {
- mi->mbmi.boundary_info |= FRAME_RIGHT_BOUNDARY | TILE_RIGHT_BOUNDARY;
- mi += cm->mi_stride;
- }
+ assert(tile->mi_col_end > tile->mi_col_start);
}
int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles) {
@@ -236,56 +151,41 @@ int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles) {
return mi_tile_size;
}
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-void av1_setup_across_tile_boundary_info(const AV1_COMMON *const cm,
- const TileInfo *const tile_info) {
- if (cm->tile_cols * cm->tile_rows > 1) {
- const int mi_row = tile_info->mi_row_start;
- const int mi_col = tile_info->mi_col_start;
- MODE_INFO *const mi_start = cm->mi + mi_row * cm->mi_stride + mi_col;
- assert(mi_start < cm->mip + cm->mi_alloc_size);
- MODE_INFO *mi = 0;
- const int row_diff = tile_info->mi_row_end - tile_info->mi_row_start;
- const int col_diff = tile_info->mi_col_end - tile_info->mi_col_start;
- int row, col;
-
-#if CONFIG_DEPENDENT_HORZTILES
- if (!cm->dependent_horz_tiles || tile_info->tg_horz_boundary)
-#endif // CONFIG_DEPENDENT_HORZTILES
- {
- mi = mi_start;
- for (col = 0; col < col_diff; ++col) {
- mi->mbmi.boundary_info |= TILE_ABOVE_BOUNDARY;
- mi += 1;
- }
- }
-
- mi = mi_start;
- for (row = 0; row < row_diff; ++row) {
- mi->mbmi.boundary_info |= TILE_LEFT_BOUNDARY;
- mi += cm->mi_stride;
- }
+AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, const AV1_COMMON *cm,
+ int is_uv) {
+ AV1PixelRect r;
+
+ // Calculate position in the Y plane
+ r.left = tile_info->mi_col_start * MI_SIZE;
+ r.right = tile_info->mi_col_end * MI_SIZE;
+ r.top = tile_info->mi_row_start * MI_SIZE;
+ r.bottom = tile_info->mi_row_end * MI_SIZE;
+
+ // If upscaling is enabled, the tile limits need scaling to match the
+ // upscaled frame where the restoration units live. To do this, scale up the
+ // top-left and bottom-right of the tile.
+ if (av1_superres_scaled(cm)) {
+ av1_calculate_unscaled_superres_size(&r.left, &r.top,
+ cm->superres_scale_denominator);
+ av1_calculate_unscaled_superres_size(&r.right, &r.bottom,
+ cm->superres_scale_denominator);
+ }
- mi = mi_start + (row_diff - 1) * cm->mi_stride;
+ const int frame_w = cm->superres_upscaled_width;
+ const int frame_h = cm->superres_upscaled_height;
- // explicit bounds checking
- assert(mi + col_diff <= cm->mip + cm->mi_alloc_size);
+ // Make sure we don't fall off the bottom-right of the frame.
+ r.right = AOMMIN(r.right, frame_w);
+ r.bottom = AOMMIN(r.bottom, frame_h);
- for (col = 0; col < col_diff; ++col) {
- mi->mbmi.boundary_info |= TILE_BOTTOM_BOUNDARY;
- mi += 1;
- }
+ // Convert to coordinates in the appropriate plane
+ const int ss_x = is_uv && cm->subsampling_x;
+ const int ss_y = is_uv && cm->subsampling_y;
- mi = mi_start + col_diff - 1;
- for (row = 0; row < row_diff; ++row) {
- mi->mbmi.boundary_info |= TILE_RIGHT_BOUNDARY;
- mi += cm->mi_stride;
- }
- }
-}
+ r.left = ROUND_POWER_OF_TWO(r.left, ss_x);
+ r.right = ROUND_POWER_OF_TWO(r.right, ss_x);
+ r.top = ROUND_POWER_OF_TWO(r.top, ss_y);
+ r.bottom = ROUND_POWER_OF_TWO(r.bottom, ss_y);
-int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm) {
- return (!cm->loop_filter_across_tiles_enabled &&
- (cm->tile_cols * cm->tile_rows > 1));
+ return r;
}
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
diff --git a/third_party/aom/av1/common/tile_common.h b/third_party/aom/av1/common/tile_common.h
index be21e1482..be037fb17 100644
--- a/third_party/aom/av1/common/tile_common.h
+++ b/third_party/aom/av1/common/tile_common.h
@@ -16,7 +16,7 @@
extern "C" {
#endif
-#include "./aom_config.h"
+#include "config/aom_config.h"
struct AV1Common;
@@ -26,6 +26,8 @@ typedef struct TileInfo {
int mi_row_start, mi_row_end;
int mi_col_start, mi_col_end;
int tg_horz_boundary;
+ int tile_row;
+ int tile_col;
} TileInfo;
// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
@@ -35,39 +37,30 @@ void av1_tile_init(TileInfo *tile, const struct AV1Common *cm, int row,
void av1_tile_set_row(TileInfo *tile, const struct AV1Common *cm, int row);
void av1_tile_set_col(TileInfo *tile, const struct AV1Common *cm, int col);
-#if CONFIG_DEPENDENT_HORZTILES
-void av1_tile_set_tg_boundary(TileInfo *tile, const struct AV1Common *const cm,
- int row, int col);
-#endif
void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
int *max_log2_tile_cols);
-void av1_setup_frame_boundary_info(const struct AV1Common *const cm);
-
// Calculate the correct tile size (width or height) for (1 << log2_tile_num)
// tiles horizontally or vertically in the frame.
int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-void av1_setup_across_tile_boundary_info(const struct AV1Common *const cm,
- const TileInfo *const tile_info);
-int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm);
-#endif // CONFIG_LOOPFILTERING_ACROSS_TILES
+typedef struct {
+ int left, top, right, bottom;
+} AV1PixelRect;
-#if CONFIG_MAX_TILE
+// Return the pixel extents of the given tile
+AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info,
+ const struct AV1Common *cm, int is_uv);
// Define tile maximum width and area
// There is no maximum height since height is limited by area and width limits
// The minimum tile width or height is fixed at one superblock
-#define MAX_TILE_WIDTH (4096) // Max Tile width in pixels
-#define MAX_TILE_WIDTH_SB (MAX_TILE_WIDTH >> MAX_SB_SIZE_LOG2)
+#define MAX_TILE_WIDTH (4096) // Max Tile width in pixels
#define MAX_TILE_AREA (4096 * 2304) // Maximum tile area in pixels
-#define MAX_TILE_AREA_SB (MAX_TILE_AREA >> (2 * MAX_SB_SIZE_LOG2))
void av1_get_tile_limits(struct AV1Common *const cm);
void av1_calculate_tile_cols(struct AV1Common *const cm);
void av1_calculate_tile_rows(struct AV1Common *const cm);
-#endif
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/common/timing.c b/third_party/aom/av1/common/timing.c
new file mode 100644
index 000000000..5ff538ae1
--- /dev/null
+++ b/third_party/aom/av1/common/timing.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/timing.h"
+
+/* Tables for AV1 max bitrates for different levels of main and high tier.
+ * The tables are in Kbps instead of Mbps in the specification.
+ * Note that depending on the profile, a multiplier is needed.
+ */
+
+/* Max Bitrates for levels of Main Tier in kbps. Bitrate in main_kbps [31] */
+/* is a dummy value. The decoder model is not applicable for level 31. */
+static int32_t main_kbps[1 << LEVEL_BITS] = {
+ 1500, 3000, 0, 0, 6000, 10000, 0, 0, 12000, 20000, 0,
+ 0, 30000, 40000, 60000, 60000, 60000, 100000, 160000, 160000, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, (1 << 26)
+};
+
+/* Max Bitrates for levels of High Tier in kbps. Bitrate in high_kbps [31] */
+/* is a dummy value. The decoder model is not applicable for level 31. */
+static int32_t high_kbps[1 << LEVEL_BITS] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 30000, 50000, 0, 0, 100000, 160000, 240000, 240000,
+ 240000, 480000, 800000, 800000, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, (1 << 26)
+};
+
+/* BitrateProfileFactor */
+static int bitrate_profile_factor[1 << PROFILE_BITS] = {
+ 1, 2, 3, 0, 0, 0, 0, 0
+};
+
+int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx,
+ int seq_tier) {
+ int64_t bitrate;
+
+ if (seq_tier) {
+ bitrate = high_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile];
+ } else {
+ bitrate = main_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile];
+ }
+
+ return bitrate * 1000;
+}
+
+void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model) {
+ decoder_model->encoder_decoder_buffer_delay_length = 16;
+ decoder_model->buffer_removal_delay_length = 10;
+ decoder_model->frame_presentation_delay_length = 10;
+}
+
+void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params) {
+ op_params->decoder_model_param_present_flag = 1;
+ op_params->decoder_buffer_delay = 90000 >> 1; // 0.5 s
+ op_params->encoder_buffer_delay = 90000 >> 1; // 0.5 s
+ op_params->low_delay_mode_flag = 0;
+ op_params->display_model_param_present_flag = 1;
+ op_params->initial_display_delay = 8; // 8 frames delay
+}
+
+void set_resource_availability_parameters(
+ aom_dec_model_op_parameters_t *op_params) {
+ op_params->decoder_model_param_present_flag = 0;
+ op_params->decoder_buffer_delay =
+ 70000; // Resource availability mode default
+ op_params->encoder_buffer_delay =
+ 20000; // Resource availability mode default
+ op_params->low_delay_mode_flag = 0; // Resource availability mode default
+ op_params->display_model_param_present_flag = 1;
+ op_params->initial_display_delay = 8; // 8 frames delay
+}
diff --git a/third_party/aom/av1/common/timing.h b/third_party/aom/av1/common/timing.h
new file mode 100644
index 000000000..d31f4b7fc
--- /dev/null
+++ b/third_party/aom/av1/common/timing.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_TIMING_H_
+#define AOM_TIMING_H_
+
+#include "aom/aom_integer.h"
+#include "av1/common/enums.h"
+
+#define MAX_NUM_OP_POINTS 32
+
+typedef struct aom_timing {
+ uint32_t num_units_in_display_tick;
+ uint32_t time_scale;
+ int equal_picture_interval;
+ uint32_t num_ticks_per_picture;
+} aom_timing_info_t;
+
+typedef struct aom_dec_model_info {
+ uint32_t num_units_in_decoding_tick;
+ int encoder_decoder_buffer_delay_length;
+ int buffer_removal_delay_length;
+ int frame_presentation_delay_length;
+} aom_dec_model_info_t;
+
+typedef struct aom_dec_model_op_parameters {
+ int decoder_model_param_present_flag;
+ int64_t bitrate;
+ int64_t buffer_size;
+ int decoder_buffer_delay;
+ int encoder_buffer_delay;
+ int low_delay_mode_flag;
+ int display_model_param_present_flag;
+ int initial_display_delay;
+} aom_dec_model_op_parameters_t;
+
+typedef struct aom_op_timing_info_t {
+ int64_t buffer_removal_delay;
+} aom_op_timing_info_t;
+
+void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model);
+
+void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params);
+
+void set_resource_availability_parameters(
+ aom_dec_model_op_parameters_t *op_params);
+
+int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx,
+ int seq_tier);
+
+#endif // AOM_TIMING_H_
diff --git a/third_party/aom/av1/common/token_cdfs.h b/third_party/aom/av1/common/token_cdfs.h
index c4f0f94c0..9a6b454ac 100644
--- a/third_party/aom/av1/common/token_cdfs.h
+++ b/third_party/aom/av1/common/token_cdfs.h
@@ -9,5245 +9,3542 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "av1/common/entropy.h"
-/* clang-format off */
-static const coeff_cdf_model
-av1_default_coef_head_cdfs_q0[TX_SIZES][PLANE_TYPES] = {
- { // TX 4X4
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(21029), AOM_ICDF(21848), AOM_ICDF(26326), AOM_ICDF(29423),
- AOM_ICDF(30610), AOM_ICDF(32768), },
- {AOM_ICDF(10066), AOM_ICDF(12716), AOM_ICDF(18523), AOM_ICDF(23277),
- AOM_ICDF(24780), AOM_ICDF(32768), },
- {AOM_ICDF(1655), AOM_ICDF(4793), AOM_ICDF(6429), AOM_ICDF(11430),
- AOM_ICDF(12206), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(10364), AOM_ICDF(14773), AOM_ICDF(25084), AOM_ICDF(25599),
- AOM_ICDF(32768), },
- {AOM_ICDF(10060), AOM_ICDF(14834), AOM_ICDF(24695), AOM_ICDF(25188),
- AOM_ICDF(32768), },
- {AOM_ICDF(8279), AOM_ICDF(11106), AOM_ICDF(21159), AOM_ICDF(21671),
- AOM_ICDF(32768), },
- {AOM_ICDF(5914), AOM_ICDF(6961), AOM_ICDF(15824), AOM_ICDF(16314),
- AOM_ICDF(32768), },
- {AOM_ICDF(3542), AOM_ICDF(3935), AOM_ICDF(10073), AOM_ICDF(10456),
- AOM_ICDF(32768), },
- {AOM_ICDF(1492), AOM_ICDF(1808), AOM_ICDF(4428), AOM_ICDF(4747),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(15783), AOM_ICDF(19657), AOM_ICDF(28753), AOM_ICDF(29248),
- AOM_ICDF(32768), },
- {AOM_ICDF(12047), AOM_ICDF(15766), AOM_ICDF(26989), AOM_ICDF(27464),
- AOM_ICDF(32768), },
- {AOM_ICDF(8412), AOM_ICDF(9971), AOM_ICDF(21538), AOM_ICDF(22026),
- AOM_ICDF(32768), },
- {AOM_ICDF(5438), AOM_ICDF(6039), AOM_ICDF(15108), AOM_ICDF(15570),
- AOM_ICDF(32768), },
- {AOM_ICDF(3247), AOM_ICDF(3593), AOM_ICDF(9495), AOM_ICDF(9859),
- AOM_ICDF(32768), },
- {AOM_ICDF(1428), AOM_ICDF(1742), AOM_ICDF(4322), AOM_ICDF(4638),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(18469), AOM_ICDF(21675), AOM_ICDF(30172), AOM_ICDF(30563),
- AOM_ICDF(32768), },
- {AOM_ICDF(12582), AOM_ICDF(16559), AOM_ICDF(27995), AOM_ICDF(28423),
- AOM_ICDF(32768), },
- {AOM_ICDF(8183), AOM_ICDF(9915), AOM_ICDF(21836), AOM_ICDF(22336),
- AOM_ICDF(32768), },
- {AOM_ICDF(5255), AOM_ICDF(5845), AOM_ICDF(15137), AOM_ICDF(15593),
- AOM_ICDF(32768), },
- {AOM_ICDF(3140), AOM_ICDF(3478), AOM_ICDF(9376), AOM_ICDF(9739),
- AOM_ICDF(32768), },
- {AOM_ICDF(1549), AOM_ICDF(1864), AOM_ICDF(4660), AOM_ICDF(4984),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(18319), AOM_ICDF(23757), AOM_ICDF(30989), AOM_ICDF(31399),
- AOM_ICDF(32768), },
- {AOM_ICDF(12864), AOM_ICDF(18051), AOM_ICDF(28729), AOM_ICDF(29218),
- AOM_ICDF(32768), },
- {AOM_ICDF(8090), AOM_ICDF(10047), AOM_ICDF(22011), AOM_ICDF(22680),
- AOM_ICDF(32768), },
- {AOM_ICDF(5061), AOM_ICDF(5688), AOM_ICDF(14783), AOM_ICDF(15379),
- AOM_ICDF(32768), },
- {AOM_ICDF(3425), AOM_ICDF(3784), AOM_ICDF(9565), AOM_ICDF(9998),
- AOM_ICDF(32768), },
- {AOM_ICDF(1564), AOM_ICDF(1884), AOM_ICDF(4703), AOM_ICDF(5054),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(8329), AOM_ICDF(23625), AOM_ICDF(30376), AOM_ICDF(31182),
- AOM_ICDF(32768), },
- {AOM_ICDF(7265), AOM_ICDF(19981), AOM_ICDF(27965), AOM_ICDF(29333),
- AOM_ICDF(32768), },
- {AOM_ICDF(5797), AOM_ICDF(12014), AOM_ICDF(21143), AOM_ICDF(23728),
- AOM_ICDF(32768), },
- {AOM_ICDF(4525), AOM_ICDF(7029), AOM_ICDF(14661), AOM_ICDF(17493),
- AOM_ICDF(32768), },
- {AOM_ICDF(3200), AOM_ICDF(4082), AOM_ICDF(9679), AOM_ICDF(11816),
- AOM_ICDF(32768), },
- {AOM_ICDF(1930), AOM_ICDF(2344), AOM_ICDF(5504), AOM_ICDF(6684),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(12366), AOM_ICDF(20513), AOM_ICDF(22133), AOM_ICDF(29810),
- AOM_ICDF(30422), AOM_ICDF(32768), },
- {AOM_ICDF(7182), AOM_ICDF(16662), AOM_ICDF(18633), AOM_ICDF(27443),
- AOM_ICDF(28120), AOM_ICDF(32768), },
- {AOM_ICDF(1791), AOM_ICDF(10613), AOM_ICDF(11616), AOM_ICDF(21520),
- AOM_ICDF(22191), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(18943), AOM_ICDF(19755), AOM_ICDF(30340), AOM_ICDF(30674),
- AOM_ICDF(32768), },
- {AOM_ICDF(15702), AOM_ICDF(17160), AOM_ICDF(28778), AOM_ICDF(29115),
- AOM_ICDF(32768), },
- {AOM_ICDF(9337), AOM_ICDF(10054), AOM_ICDF(22492), AOM_ICDF(22845),
- AOM_ICDF(32768), },
- {AOM_ICDF(6550), AOM_ICDF(7019), AOM_ICDF(17068), AOM_ICDF(17420),
- AOM_ICDF(32768), },
- {AOM_ICDF(4169), AOM_ICDF(4566), AOM_ICDF(11849), AOM_ICDF(12185),
- AOM_ICDF(32768), },
- {AOM_ICDF(2495), AOM_ICDF(2839), AOM_ICDF(6895), AOM_ICDF(7221),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20241), AOM_ICDF(21593), AOM_ICDF(31083), AOM_ICDF(31425),
- AOM_ICDF(32768), },
- {AOM_ICDF(15276), AOM_ICDF(16626), AOM_ICDF(28787), AOM_ICDF(29136),
- AOM_ICDF(32768), },
- {AOM_ICDF(7656), AOM_ICDF(8102), AOM_ICDF(20347), AOM_ICDF(20700),
- AOM_ICDF(32768), },
- {AOM_ICDF(4527), AOM_ICDF(4880), AOM_ICDF(13482), AOM_ICDF(13819),
- AOM_ICDF(32768), },
- {AOM_ICDF(2538), AOM_ICDF(2860), AOM_ICDF(7975), AOM_ICDF(8298),
- AOM_ICDF(32768), },
- {AOM_ICDF(1394), AOM_ICDF(1707), AOM_ICDF(3770), AOM_ICDF(4086),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19968), AOM_ICDF(21872), AOM_ICDF(30812), AOM_ICDF(31172),
- AOM_ICDF(32768), },
- {AOM_ICDF(15081), AOM_ICDF(16805), AOM_ICDF(28957), AOM_ICDF(29326),
- AOM_ICDF(32768), },
- {AOM_ICDF(8196), AOM_ICDF(8748), AOM_ICDF(21434), AOM_ICDF(21825),
- AOM_ICDF(32768), },
- {AOM_ICDF(5297), AOM_ICDF(5675), AOM_ICDF(15007), AOM_ICDF(15385),
- AOM_ICDF(32768), },
- {AOM_ICDF(3102), AOM_ICDF(3429), AOM_ICDF(9255), AOM_ICDF(9607),
- AOM_ICDF(32768), },
- {AOM_ICDF(1502), AOM_ICDF(1815), AOM_ICDF(4662), AOM_ICDF(4983),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(19362), AOM_ICDF(22537), AOM_ICDF(31260), AOM_ICDF(31624),
- AOM_ICDF(32768), },
- {AOM_ICDF(14450), AOM_ICDF(17789), AOM_ICDF(29362), AOM_ICDF(29788),
- AOM_ICDF(32768), },
- {AOM_ICDF(7957), AOM_ICDF(8982), AOM_ICDF(21542), AOM_ICDF(22120),
- AOM_ICDF(32768), },
- {AOM_ICDF(4819), AOM_ICDF(5280), AOM_ICDF(14199), AOM_ICDF(14724),
- AOM_ICDF(32768), },
- {AOM_ICDF(2831), AOM_ICDF(3180), AOM_ICDF(8511), AOM_ICDF(8950),
- AOM_ICDF(32768), },
- {AOM_ICDF(1385), AOM_ICDF(1700), AOM_ICDF(4300), AOM_ICDF(4633),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(14134), AOM_ICDF(22252), AOM_ICDF(31119), AOM_ICDF(31577),
- AOM_ICDF(32768), },
- {AOM_ICDF(11383), AOM_ICDF(19847), AOM_ICDF(29451), AOM_ICDF(30205),
- AOM_ICDF(32768), },
- {AOM_ICDF(7338), AOM_ICDF(11314), AOM_ICDF(22338), AOM_ICDF(24028),
- AOM_ICDF(32768), },
- {AOM_ICDF(5071), AOM_ICDF(6634), AOM_ICDF(15379), AOM_ICDF(17178),
- AOM_ICDF(32768), },
- {AOM_ICDF(2969), AOM_ICDF(3703), AOM_ICDF(9896), AOM_ICDF(11246),
- AOM_ICDF(32768), },
- {AOM_ICDF(1809), AOM_ICDF(2173), AOM_ICDF(5573), AOM_ICDF(6229),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(25213), AOM_ICDF(26007), AOM_ICDF(29751), AOM_ICDF(31199),
- AOM_ICDF(31688), AOM_ICDF(32768), },
- {AOM_ICDF(13781), AOM_ICDF(16489), AOM_ICDF(23298), AOM_ICDF(27505),
- AOM_ICDF(28405), AOM_ICDF(32768), },
- {AOM_ICDF(4621), AOM_ICDF(9194), AOM_ICDF(12764), AOM_ICDF(19842),
- AOM_ICDF(20708), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(12686), AOM_ICDF(19031), AOM_ICDF(28910), AOM_ICDF(29358),
- AOM_ICDF(32768), },
- {AOM_ICDF(12732), AOM_ICDF(18729), AOM_ICDF(28346), AOM_ICDF(28824),
- AOM_ICDF(32768), },
- {AOM_ICDF(9753), AOM_ICDF(12954), AOM_ICDF(24344), AOM_ICDF(24920),
- AOM_ICDF(32768), },
- {AOM_ICDF(6853), AOM_ICDF(7851), AOM_ICDF(18601), AOM_ICDF(19110),
- AOM_ICDF(32768), },
- {AOM_ICDF(3697), AOM_ICDF(4071), AOM_ICDF(11373), AOM_ICDF(11743),
- AOM_ICDF(32768), },
- {AOM_ICDF(1738), AOM_ICDF(2057), AOM_ICDF(5307), AOM_ICDF(5627),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18299), AOM_ICDF(24455), AOM_ICDF(30930), AOM_ICDF(31398),
- AOM_ICDF(32768), },
- {AOM_ICDF(14316), AOM_ICDF(19083), AOM_ICDF(29266), AOM_ICDF(29766),
- AOM_ICDF(32768), },
- {AOM_ICDF(9584), AOM_ICDF(11344), AOM_ICDF(23898), AOM_ICDF(24407),
- AOM_ICDF(32768), },
- {AOM_ICDF(6076), AOM_ICDF(6645), AOM_ICDF(16805), AOM_ICDF(17237),
- AOM_ICDF(32768), },
- {AOM_ICDF(3535), AOM_ICDF(3885), AOM_ICDF(10393), AOM_ICDF(10746),
- AOM_ICDF(32768), },
- {AOM_ICDF(1909), AOM_ICDF(2222), AOM_ICDF(5010), AOM_ICDF(5328),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(21106), AOM_ICDF(25258), AOM_ICDF(31172), AOM_ICDF(31576),
- AOM_ICDF(32768), },
- {AOM_ICDF(14944), AOM_ICDF(20229), AOM_ICDF(29903), AOM_ICDF(30361),
- AOM_ICDF(32768), },
- {AOM_ICDF(10454), AOM_ICDF(13063), AOM_ICDF(25548), AOM_ICDF(26138),
- AOM_ICDF(32768), },
- {AOM_ICDF(7667), AOM_ICDF(8529), AOM_ICDF(20025), AOM_ICDF(20588),
- AOM_ICDF(32768), },
- {AOM_ICDF(4813), AOM_ICDF(5176), AOM_ICDF(13672), AOM_ICDF(14085),
- AOM_ICDF(32768), },
- {AOM_ICDF(2450), AOM_ICDF(2763), AOM_ICDF(7515), AOM_ICDF(7873),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(18297), AOM_ICDF(25980), AOM_ICDF(31547), AOM_ICDF(31946),
- AOM_ICDF(32768), },
- {AOM_ICDF(13370), AOM_ICDF(21048), AOM_ICDF(30193), AOM_ICDF(30703),
- AOM_ICDF(32768), },
- {AOM_ICDF(9326), AOM_ICDF(13020), AOM_ICDF(25206), AOM_ICDF(26074),
- AOM_ICDF(32768), },
- {AOM_ICDF(6117), AOM_ICDF(7480), AOM_ICDF(18243), AOM_ICDF(19130),
- AOM_ICDF(32768), },
- {AOM_ICDF(6408), AOM_ICDF(6819), AOM_ICDF(13596), AOM_ICDF(14098),
- AOM_ICDF(32768), },
- {AOM_ICDF(2179), AOM_ICDF(2485), AOM_ICDF(7393), AOM_ICDF(7768),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(11255), AOM_ICDF(26931), AOM_ICDF(31505), AOM_ICDF(32033),
- AOM_ICDF(32768), },
- {AOM_ICDF(9120), AOM_ICDF(23148), AOM_ICDF(30070), AOM_ICDF(31091),
- AOM_ICDF(32768), },
- {AOM_ICDF(7927), AOM_ICDF(15909), AOM_ICDF(25162), AOM_ICDF(27329),
- AOM_ICDF(32768), },
- {AOM_ICDF(6416), AOM_ICDF(10706), AOM_ICDF(19959), AOM_ICDF(22732),
- AOM_ICDF(32768), },
- {AOM_ICDF(4232), AOM_ICDF(5545), AOM_ICDF(13107), AOM_ICDF(15118),
- AOM_ICDF(32768), },
- {AOM_ICDF(2626), AOM_ICDF(2941), AOM_ICDF(8665), AOM_ICDF(9872),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(27618), AOM_ICDF(28976), AOM_ICDF(30940), AOM_ICDF(31993),
- AOM_ICDF(32336), AOM_ICDF(32768), },
- {AOM_ICDF(16119), AOM_ICDF(21691), AOM_ICDF(26334), AOM_ICDF(30705),
- AOM_ICDF(31244), AOM_ICDF(32768), },
- {AOM_ICDF(5114), AOM_ICDF(14755), AOM_ICDF(17865), AOM_ICDF(27048),
- AOM_ICDF(27895), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(19468), AOM_ICDF(23767), AOM_ICDF(31339), AOM_ICDF(31674),
- AOM_ICDF(32768), },
- {AOM_ICDF(16878), AOM_ICDF(20966), AOM_ICDF(30654), AOM_ICDF(31007),
- AOM_ICDF(32768), },
- {AOM_ICDF(12213), AOM_ICDF(14415), AOM_ICDF(26909), AOM_ICDF(27338),
- AOM_ICDF(32768), },
- {AOM_ICDF(9404), AOM_ICDF(10670), AOM_ICDF(22239), AOM_ICDF(22719),
- AOM_ICDF(32768), },
- {AOM_ICDF(6856), AOM_ICDF(7784), AOM_ICDF(17127), AOM_ICDF(17609),
- AOM_ICDF(32768), },
- {AOM_ICDF(5034), AOM_ICDF(5529), AOM_ICDF(13229), AOM_ICDF(13634),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21214), AOM_ICDF(25570), AOM_ICDF(31656), AOM_ICDF(31994),
- AOM_ICDF(32768), },
- {AOM_ICDF(17012), AOM_ICDF(20535), AOM_ICDF(30403), AOM_ICDF(30787),
- AOM_ICDF(32768), },
- {AOM_ICDF(10855), AOM_ICDF(12147), AOM_ICDF(25451), AOM_ICDF(25874),
- AOM_ICDF(32768), },
- {AOM_ICDF(7055), AOM_ICDF(7837), AOM_ICDF(19116), AOM_ICDF(19553),
- AOM_ICDF(32768), },
- {AOM_ICDF(4141), AOM_ICDF(4531), AOM_ICDF(11911), AOM_ICDF(12296),
- AOM_ICDF(32768), },
- {AOM_ICDF(1706), AOM_ICDF(2041), AOM_ICDF(5622), AOM_ICDF(5957),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22092), AOM_ICDF(26330), AOM_ICDF(31642), AOM_ICDF(32015),
- AOM_ICDF(32768), },
- {AOM_ICDF(16433), AOM_ICDF(20889), AOM_ICDF(30263), AOM_ICDF(30704),
- AOM_ICDF(32768), },
- {AOM_ICDF(11015), AOM_ICDF(13045), AOM_ICDF(26253), AOM_ICDF(26743),
- AOM_ICDF(32768), },
- {AOM_ICDF(9188), AOM_ICDF(9924), AOM_ICDF(21991), AOM_ICDF(22551),
- AOM_ICDF(32768), },
- {AOM_ICDF(5259), AOM_ICDF(5634), AOM_ICDF(14131), AOM_ICDF(14627),
- AOM_ICDF(32768), },
- {AOM_ICDF(1916), AOM_ICDF(2218), AOM_ICDF(6453), AOM_ICDF(6780),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(20331), AOM_ICDF(26854), AOM_ICDF(31896), AOM_ICDF(32255),
- AOM_ICDF(32768), },
- {AOM_ICDF(15738), AOM_ICDF(22741), AOM_ICDF(31108), AOM_ICDF(31557),
- AOM_ICDF(32768), },
- {AOM_ICDF(11693), AOM_ICDF(15508), AOM_ICDF(27435), AOM_ICDF(28103),
- AOM_ICDF(32768), },
- {AOM_ICDF(8066), AOM_ICDF(9281), AOM_ICDF(20855), AOM_ICDF(21631),
- AOM_ICDF(32768), },
- {AOM_ICDF(4427), AOM_ICDF(4860), AOM_ICDF(12951), AOM_ICDF(13543),
- AOM_ICDF(32768), },
- {AOM_ICDF(1928), AOM_ICDF(2372), AOM_ICDF(5634), AOM_ICDF(6672),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(15605), AOM_ICDF(27749), AOM_ICDF(31907), AOM_ICDF(32303),
- AOM_ICDF(32768), },
- {AOM_ICDF(11920), AOM_ICDF(24653), AOM_ICDF(31013), AOM_ICDF(31675),
- AOM_ICDF(32768), },
- {AOM_ICDF(8007), AOM_ICDF(14898), AOM_ICDF(25377), AOM_ICDF(27353),
- AOM_ICDF(32768), },
- {AOM_ICDF(6010), AOM_ICDF(8920), AOM_ICDF(18956), AOM_ICDF(21554),
- AOM_ICDF(32768), },
- {AOM_ICDF(4573), AOM_ICDF(5611), AOM_ICDF(13522), AOM_ICDF(15795),
- AOM_ICDF(32768), },
- {AOM_ICDF(4274), AOM_ICDF(6411), AOM_ICDF(11398), AOM_ICDF(14247),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 8X8
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(22195), AOM_ICDF(22830), AOM_ICDF(25684), AOM_ICDF(28569),
- AOM_ICDF(30557), AOM_ICDF(32768), },
- {AOM_ICDF(9973), AOM_ICDF(12001), AOM_ICDF(15354), AOM_ICDF(20353),
- AOM_ICDF(23020), AOM_ICDF(32768), },
- {AOM_ICDF(1514), AOM_ICDF(3998), AOM_ICDF(4873), AOM_ICDF(9182),
- AOM_ICDF(9967), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(11609), AOM_ICDF(14013), AOM_ICDF(24609), AOM_ICDF(25092),
- AOM_ICDF(32768), },
- {AOM_ICDF(10302), AOM_ICDF(15208), AOM_ICDF(24145), AOM_ICDF(24658),
- AOM_ICDF(32768), },
- {AOM_ICDF(7991), AOM_ICDF(10895), AOM_ICDF(20438), AOM_ICDF(21146),
- AOM_ICDF(32768), },
- {AOM_ICDF(5831), AOM_ICDF(7006), AOM_ICDF(15716), AOM_ICDF(16394),
- AOM_ICDF(32768), },
- {AOM_ICDF(3536), AOM_ICDF(3969), AOM_ICDF(10117), AOM_ICDF(10569),
- AOM_ICDF(32768), },
- {AOM_ICDF(1369), AOM_ICDF(1686), AOM_ICDF(4062), AOM_ICDF(4385),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(17334), AOM_ICDF(19416), AOM_ICDF(28420), AOM_ICDF(28798),
- AOM_ICDF(32768), },
- {AOM_ICDF(13512), AOM_ICDF(15917), AOM_ICDF(26736), AOM_ICDF(27122),
- AOM_ICDF(32768), },
- {AOM_ICDF(9322), AOM_ICDF(10491), AOM_ICDF(21892), AOM_ICDF(22281),
- AOM_ICDF(32768), },
- {AOM_ICDF(6187), AOM_ICDF(6682), AOM_ICDF(15992), AOM_ICDF(16351),
- AOM_ICDF(32768), },
- {AOM_ICDF(3733), AOM_ICDF(4073), AOM_ICDF(10406), AOM_ICDF(10735),
- AOM_ICDF(32768), },
- {AOM_ICDF(1606), AOM_ICDF(1920), AOM_ICDF(4715), AOM_ICDF(5028),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(20589), AOM_ICDF(22106), AOM_ICDF(30065), AOM_ICDF(30422),
- AOM_ICDF(32768), },
- {AOM_ICDF(14731), AOM_ICDF(16342), AOM_ICDF(27701), AOM_ICDF(28059),
- AOM_ICDF(32768), },
- {AOM_ICDF(8554), AOM_ICDF(9080), AOM_ICDF(20831), AOM_ICDF(21182),
- AOM_ICDF(32768), },
- {AOM_ICDF(5011), AOM_ICDF(5354), AOM_ICDF(13968), AOM_ICDF(14296),
- AOM_ICDF(32768), },
- {AOM_ICDF(2867), AOM_ICDF(3184), AOM_ICDF(8524), AOM_ICDF(8840),
- AOM_ICDF(32768), },
- {AOM_ICDF(1174), AOM_ICDF(1486), AOM_ICDF(3643), AOM_ICDF(3955),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(23439), AOM_ICDF(24729), AOM_ICDF(31199), AOM_ICDF(31537),
- AOM_ICDF(32768), },
- {AOM_ICDF(15716), AOM_ICDF(17015), AOM_ICDF(28650), AOM_ICDF(28989),
- AOM_ICDF(32768), },
- {AOM_ICDF(8381), AOM_ICDF(8812), AOM_ICDF(21032), AOM_ICDF(21369),
- AOM_ICDF(32768), },
- {AOM_ICDF(4868), AOM_ICDF(5197), AOM_ICDF(13740), AOM_ICDF(14065),
- AOM_ICDF(32768), },
- {AOM_ICDF(2744), AOM_ICDF(3058), AOM_ICDF(8333), AOM_ICDF(8648),
- AOM_ICDF(32768), },
- {AOM_ICDF(1185), AOM_ICDF(1497), AOM_ICDF(3656), AOM_ICDF(3968),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(23980), AOM_ICDF(26041), AOM_ICDF(31566), AOM_ICDF(31904),
- AOM_ICDF(32768), },
- {AOM_ICDF(16060), AOM_ICDF(18243), AOM_ICDF(29508), AOM_ICDF(29868),
- AOM_ICDF(32768), },
- {AOM_ICDF(8844), AOM_ICDF(9414), AOM_ICDF(22119), AOM_ICDF(22496),
- AOM_ICDF(32768), },
- {AOM_ICDF(5265), AOM_ICDF(5612), AOM_ICDF(14599), AOM_ICDF(14944),
- AOM_ICDF(32768), },
- {AOM_ICDF(3058), AOM_ICDF(3375), AOM_ICDF(9028), AOM_ICDF(9351),
- AOM_ICDF(32768), },
- {AOM_ICDF(1414), AOM_ICDF(1726), AOM_ICDF(4249), AOM_ICDF(4563),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(9994), AOM_ICDF(19506), AOM_ICDF(21744), AOM_ICDF(29408),
- AOM_ICDF(30809), AOM_ICDF(32768), },
- {AOM_ICDF(3771), AOM_ICDF(14862), AOM_ICDF(16756), AOM_ICDF(26385),
- AOM_ICDF(27927), AOM_ICDF(32768), },
- {AOM_ICDF(964), AOM_ICDF(10643), AOM_ICDF(11416), AOM_ICDF(21060),
- AOM_ICDF(22316), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23263), AOM_ICDF(23761), AOM_ICDF(31250), AOM_ICDF(31580),
- AOM_ICDF(32768), },
- {AOM_ICDF(19631), AOM_ICDF(21067), AOM_ICDF(30262), AOM_ICDF(30596),
- AOM_ICDF(32768), },
- {AOM_ICDF(12419), AOM_ICDF(13646), AOM_ICDF(25959), AOM_ICDF(26329),
- AOM_ICDF(32768), },
- {AOM_ICDF(9274), AOM_ICDF(10229), AOM_ICDF(21588), AOM_ICDF(21981),
- AOM_ICDF(32768), },
- {AOM_ICDF(6778), AOM_ICDF(7496), AOM_ICDF(17069), AOM_ICDF(17469),
- AOM_ICDF(32768), },
- {AOM_ICDF(4655), AOM_ICDF(5089), AOM_ICDF(12206), AOM_ICDF(12574),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24055), AOM_ICDF(24771), AOM_ICDF(31529), AOM_ICDF(31851),
- AOM_ICDF(32768), },
- {AOM_ICDF(18300), AOM_ICDF(19177), AOM_ICDF(29983), AOM_ICDF(30310),
- AOM_ICDF(32768), },
- {AOM_ICDF(9684), AOM_ICDF(10239), AOM_ICDF(23130), AOM_ICDF(23465),
- AOM_ICDF(32768), },
- {AOM_ICDF(6112), AOM_ICDF(6511), AOM_ICDF(16539), AOM_ICDF(16874),
- AOM_ICDF(32768), },
- {AOM_ICDF(3508), AOM_ICDF(3841), AOM_ICDF(10475), AOM_ICDF(10798),
- AOM_ICDF(32768), },
- {AOM_ICDF(1647), AOM_ICDF(1963), AOM_ICDF(5379), AOM_ICDF(5693),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24875), AOM_ICDF(25551), AOM_ICDF(31757), AOM_ICDF(32078),
- AOM_ICDF(32768), },
- {AOM_ICDF(18585), AOM_ICDF(19328), AOM_ICDF(30217), AOM_ICDF(30542),
- AOM_ICDF(32768), },
- {AOM_ICDF(8948), AOM_ICDF(9350), AOM_ICDF(22251), AOM_ICDF(22577),
- AOM_ICDF(32768), },
- {AOM_ICDF(5148), AOM_ICDF(5481), AOM_ICDF(14806), AOM_ICDF(15127),
- AOM_ICDF(32768), },
- {AOM_ICDF(2852), AOM_ICDF(3169), AOM_ICDF(8930), AOM_ICDF(9249),
- AOM_ICDF(32768), },
- {AOM_ICDF(1298), AOM_ICDF(1609), AOM_ICDF(4289), AOM_ICDF(4600),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25149), AOM_ICDF(25840), AOM_ICDF(31833), AOM_ICDF(32153),
- AOM_ICDF(32768), },
- {AOM_ICDF(19051), AOM_ICDF(19689), AOM_ICDF(30461), AOM_ICDF(30785),
- AOM_ICDF(32768), },
- {AOM_ICDF(8956), AOM_ICDF(9308), AOM_ICDF(22406), AOM_ICDF(22729),
- AOM_ICDF(32768), },
- {AOM_ICDF(5001), AOM_ICDF(5325), AOM_ICDF(14586), AOM_ICDF(14906),
- AOM_ICDF(32768), },
- {AOM_ICDF(2875), AOM_ICDF(3189), AOM_ICDF(8639), AOM_ICDF(8955),
- AOM_ICDF(32768), },
- {AOM_ICDF(1311), AOM_ICDF(1623), AOM_ICDF(4261), AOM_ICDF(4572),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(25212), AOM_ICDF(26544), AOM_ICDF(31879), AOM_ICDF(32209),
- AOM_ICDF(32768), },
- {AOM_ICDF(18967), AOM_ICDF(20523), AOM_ICDF(30778), AOM_ICDF(31126),
- AOM_ICDF(32768), },
- {AOM_ICDF(9672), AOM_ICDF(10140), AOM_ICDF(23740), AOM_ICDF(24117),
- AOM_ICDF(32768), },
- {AOM_ICDF(5732), AOM_ICDF(6079), AOM_ICDF(16067), AOM_ICDF(16423),
- AOM_ICDF(32768), },
- {AOM_ICDF(3370), AOM_ICDF(3687), AOM_ICDF(10101), AOM_ICDF(10429),
- AOM_ICDF(32768), },
- {AOM_ICDF(1696), AOM_ICDF(2007), AOM_ICDF(5320), AOM_ICDF(5648),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(26296), AOM_ICDF(26903), AOM_ICDF(30027), AOM_ICDF(31098),
- AOM_ICDF(31851), AOM_ICDF(32768), },
- {AOM_ICDF(13982), AOM_ICDF(16223), AOM_ICDF(22840), AOM_ICDF(26540),
- AOM_ICDF(28301), AOM_ICDF(32768), },
- {AOM_ICDF(5643), AOM_ICDF(9834), AOM_ICDF(13670), AOM_ICDF(20220),
- AOM_ICDF(21734), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(14291), AOM_ICDF(20303), AOM_ICDF(29319), AOM_ICDF(29879),
- AOM_ICDF(32768), },
- {AOM_ICDF(13407), AOM_ICDF(20905), AOM_ICDF(29052), AOM_ICDF(29644),
- AOM_ICDF(32768), },
- {AOM_ICDF(10860), AOM_ICDF(15525), AOM_ICDF(25872), AOM_ICDF(26766),
- AOM_ICDF(32768), },
- {AOM_ICDF(7801), AOM_ICDF(9554), AOM_ICDF(20530), AOM_ICDF(21309),
- AOM_ICDF(32768), },
- {AOM_ICDF(4523), AOM_ICDF(4994), AOM_ICDF(12583), AOM_ICDF(13069),
- AOM_ICDF(32768), },
- {AOM_ICDF(1784), AOM_ICDF(2110), AOM_ICDF(5198), AOM_ICDF(5511),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20153), AOM_ICDF(24114), AOM_ICDF(30802), AOM_ICDF(31195),
- AOM_ICDF(32768), },
- {AOM_ICDF(16079), AOM_ICDF(19936), AOM_ICDF(29580), AOM_ICDF(29992),
- AOM_ICDF(32768), },
- {AOM_ICDF(10977), AOM_ICDF(12993), AOM_ICDF(25245), AOM_ICDF(25687),
- AOM_ICDF(32768), },
- {AOM_ICDF(7386), AOM_ICDF(8212), AOM_ICDF(19223), AOM_ICDF(19683),
- AOM_ICDF(32768), },
- {AOM_ICDF(4797), AOM_ICDF(5164), AOM_ICDF(12928), AOM_ICDF(13288),
- AOM_ICDF(32768), },
- {AOM_ICDF(2188), AOM_ICDF(2498), AOM_ICDF(6396), AOM_ICDF(6706),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24221), AOM_ICDF(26746), AOM_ICDF(31634), AOM_ICDF(31980),
- AOM_ICDF(32768), },
- {AOM_ICDF(17492), AOM_ICDF(20348), AOM_ICDF(30067), AOM_ICDF(30432),
- AOM_ICDF(32768), },
- {AOM_ICDF(10522), AOM_ICDF(11531), AOM_ICDF(24642), AOM_ICDF(25031),
- AOM_ICDF(32768), },
- {AOM_ICDF(6567), AOM_ICDF(7006), AOM_ICDF(17688), AOM_ICDF(18036),
- AOM_ICDF(32768), },
- {AOM_ICDF(4123), AOM_ICDF(4447), AOM_ICDF(11775), AOM_ICDF(12095),
- AOM_ICDF(32768), },
- {AOM_ICDF(1770), AOM_ICDF(2065), AOM_ICDF(6491), AOM_ICDF(6786),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25862), AOM_ICDF(27744), AOM_ICDF(31611), AOM_ICDF(31969),
- AOM_ICDF(32768), },
- {AOM_ICDF(17752), AOM_ICDF(20079), AOM_ICDF(30169), AOM_ICDF(30530),
- AOM_ICDF(32768), },
- {AOM_ICDF(10588), AOM_ICDF(11308), AOM_ICDF(24834), AOM_ICDF(25180),
- AOM_ICDF(32768), },
- {AOM_ICDF(7459), AOM_ICDF(7820), AOM_ICDF(17949), AOM_ICDF(18281),
- AOM_ICDF(32768), },
- {AOM_ICDF(3984), AOM_ICDF(4294), AOM_ICDF(11863), AOM_ICDF(12173),
- AOM_ICDF(32768), },
- {AOM_ICDF(2689), AOM_ICDF(2969), AOM_ICDF(11371), AOM_ICDF(11651),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27703), AOM_ICDF(29662), AOM_ICDF(31910), AOM_ICDF(32262),
- AOM_ICDF(32768), },
- {AOM_ICDF(17904), AOM_ICDF(21878), AOM_ICDF(30510), AOM_ICDF(30969),
- AOM_ICDF(32768), },
- {AOM_ICDF(10660), AOM_ICDF(12299), AOM_ICDF(24907), AOM_ICDF(25524),
- AOM_ICDF(32768), },
- {AOM_ICDF(6972), AOM_ICDF(7545), AOM_ICDF(18660), AOM_ICDF(19251),
- AOM_ICDF(32768), },
- {AOM_ICDF(5359), AOM_ICDF(5768), AOM_ICDF(14022), AOM_ICDF(14397),
- AOM_ICDF(32768), },
- {AOM_ICDF(5030), AOM_ICDF(5487), AOM_ICDF(10364), AOM_ICDF(10973),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(27980), AOM_ICDF(28880), AOM_ICDF(31045), AOM_ICDF(31931),
- AOM_ICDF(32370), AOM_ICDF(32768), },
- {AOM_ICDF(15958), AOM_ICDF(19891), AOM_ICDF(25963), AOM_ICDF(29601),
- AOM_ICDF(30931), AOM_ICDF(32768), },
- {AOM_ICDF(3897), AOM_ICDF(12331), AOM_ICDF(15935), AOM_ICDF(24489),
- AOM_ICDF(26773), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(21443), AOM_ICDF(24237), AOM_ICDF(31473), AOM_ICDF(31808),
- AOM_ICDF(32768), },
- {AOM_ICDF(18617), AOM_ICDF(22378), AOM_ICDF(30958), AOM_ICDF(31301),
- AOM_ICDF(32768), },
- {AOM_ICDF(14626), AOM_ICDF(17725), AOM_ICDF(28852), AOM_ICDF(29246),
- AOM_ICDF(32768), },
- {AOM_ICDF(12155), AOM_ICDF(14598), AOM_ICDF(26000), AOM_ICDF(26506),
- AOM_ICDF(32768), },
- {AOM_ICDF(10111), AOM_ICDF(12149), AOM_ICDF(23415), AOM_ICDF(24002),
- AOM_ICDF(32768), },
- {AOM_ICDF(11352), AOM_ICDF(12864), AOM_ICDF(22589), AOM_ICDF(23010),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22855), AOM_ICDF(25401), AOM_ICDF(31675), AOM_ICDF(31999),
- AOM_ICDF(32768), },
- {AOM_ICDF(19086), AOM_ICDF(21008), AOM_ICDF(30886), AOM_ICDF(31214),
- AOM_ICDF(32768), },
- {AOM_ICDF(13477), AOM_ICDF(14473), AOM_ICDF(28104), AOM_ICDF(28450),
- AOM_ICDF(32768), },
- {AOM_ICDF(9553), AOM_ICDF(10401), AOM_ICDF(23815), AOM_ICDF(24225),
- AOM_ICDF(32768), },
- {AOM_ICDF(5795), AOM_ICDF(6172), AOM_ICDF(18068), AOM_ICDF(18445),
- AOM_ICDF(32768), },
- {AOM_ICDF(4297), AOM_ICDF(5909), AOM_ICDF(10206), AOM_ICDF(11818),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24424), AOM_ICDF(26344), AOM_ICDF(31912), AOM_ICDF(32231),
- AOM_ICDF(32768), },
- {AOM_ICDF(20229), AOM_ICDF(21775), AOM_ICDF(31283), AOM_ICDF(31610),
- AOM_ICDF(32768), },
- {AOM_ICDF(14224), AOM_ICDF(14882), AOM_ICDF(28673), AOM_ICDF(29012),
- AOM_ICDF(32768), },
- {AOM_ICDF(10881), AOM_ICDF(11494), AOM_ICDF(23829), AOM_ICDF(24238),
- AOM_ICDF(32768), },
- {AOM_ICDF(6367), AOM_ICDF(6988), AOM_ICDF(15685), AOM_ICDF(16306),
- AOM_ICDF(32768), },
- {AOM_ICDF(7447), AOM_ICDF(11916), AOM_ICDF(17873), AOM_ICDF(22342),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25536), AOM_ICDF(27216), AOM_ICDF(31570), AOM_ICDF(31916),
- AOM_ICDF(32768), },
- {AOM_ICDF(19600), AOM_ICDF(21062), AOM_ICDF(30095), AOM_ICDF(30444),
- AOM_ICDF(32768), },
- {AOM_ICDF(11491), AOM_ICDF(12044), AOM_ICDF(26170), AOM_ICDF(26497),
- AOM_ICDF(32768), },
- {AOM_ICDF(9629), AOM_ICDF(9963), AOM_ICDF(23790), AOM_ICDF(24112),
- AOM_ICDF(32768), },
- {AOM_ICDF(8073), AOM_ICDF(8359), AOM_ICDF(22212), AOM_ICDF(22498),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27425), AOM_ICDF(29611), AOM_ICDF(32005), AOM_ICDF(32347),
- AOM_ICDF(32768), },
- {AOM_ICDF(20590), AOM_ICDF(24265), AOM_ICDF(31252), AOM_ICDF(31658),
- AOM_ICDF(32768), },
- {AOM_ICDF(14072), AOM_ICDF(15705), AOM_ICDF(28945), AOM_ICDF(29389),
- AOM_ICDF(32768), },
- {AOM_ICDF(11295), AOM_ICDF(11926), AOM_ICDF(26485), AOM_ICDF(26872),
- AOM_ICDF(32768), },
- {AOM_ICDF(10627), AOM_ICDF(11292), AOM_ICDF(22141), AOM_ICDF(22805),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 16X16
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(9850), AOM_ICDF(11321), AOM_ICDF(13211), AOM_ICDF(18246),
- AOM_ICDF(21613), AOM_ICDF(32768), },
- {AOM_ICDF(4128), AOM_ICDF(6155), AOM_ICDF(7367), AOM_ICDF(11928),
- AOM_ICDF(14060), AOM_ICDF(32768), },
- {AOM_ICDF(932), AOM_ICDF(2794), AOM_ICDF(3234), AOM_ICDF(6647),
- AOM_ICDF(7340), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(9101), AOM_ICDF(10823), AOM_ICDF(21291), AOM_ICDF(22109),
- AOM_ICDF(32768), },
- {AOM_ICDF(8086), AOM_ICDF(13032), AOM_ICDF(21855), AOM_ICDF(22748),
- AOM_ICDF(32768), },
- {AOM_ICDF(6563), AOM_ICDF(10137), AOM_ICDF(18484), AOM_ICDF(20069),
- AOM_ICDF(32768), },
- {AOM_ICDF(4987), AOM_ICDF(6567), AOM_ICDF(14425), AOM_ICDF(15700),
- AOM_ICDF(32768), },
- {AOM_ICDF(3399), AOM_ICDF(3947), AOM_ICDF(9950), AOM_ICDF(10738),
- AOM_ICDF(32768), },
- {AOM_ICDF(1474), AOM_ICDF(1793), AOM_ICDF(4347), AOM_ICDF(4690),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(17035), AOM_ICDF(18650), AOM_ICDF(27401), AOM_ICDF(27793),
- AOM_ICDF(32768), },
- {AOM_ICDF(13213), AOM_ICDF(16039), AOM_ICDF(26044), AOM_ICDF(26448),
- AOM_ICDF(32768), },
- {AOM_ICDF(9916), AOM_ICDF(11812), AOM_ICDF(22497), AOM_ICDF(22945),
- AOM_ICDF(32768), },
- {AOM_ICDF(7227), AOM_ICDF(8059), AOM_ICDF(17399), AOM_ICDF(17817),
- AOM_ICDF(32768), },
- {AOM_ICDF(5144), AOM_ICDF(5572), AOM_ICDF(12546), AOM_ICDF(12892),
- AOM_ICDF(32768), },
- {AOM_ICDF(2364), AOM_ICDF(2678), AOM_ICDF(6057), AOM_ICDF(6372),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19805), AOM_ICDF(21667), AOM_ICDF(29302), AOM_ICDF(29680),
- AOM_ICDF(32768), },
- {AOM_ICDF(14232), AOM_ICDF(16314), AOM_ICDF(27120), AOM_ICDF(27515),
- AOM_ICDF(32768), },
- {AOM_ICDF(8796), AOM_ICDF(9578), AOM_ICDF(21112), AOM_ICDF(21479),
- AOM_ICDF(32768), },
- {AOM_ICDF(5203), AOM_ICDF(5552), AOM_ICDF(14231), AOM_ICDF(14563),
- AOM_ICDF(32768), },
- {AOM_ICDF(2943), AOM_ICDF(3257), AOM_ICDF(8676), AOM_ICDF(8994),
- AOM_ICDF(32768), },
- {AOM_ICDF(1363), AOM_ICDF(1675), AOM_ICDF(4064), AOM_ICDF(4376),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24214), AOM_ICDF(25083), AOM_ICDF(30916), AOM_ICDF(31249),
- AOM_ICDF(32768), },
- {AOM_ICDF(15904), AOM_ICDF(17001), AOM_ICDF(28199), AOM_ICDF(28532),
- AOM_ICDF(32768), },
- {AOM_ICDF(8324), AOM_ICDF(8717), AOM_ICDF(20480), AOM_ICDF(20808),
- AOM_ICDF(32768), },
- {AOM_ICDF(4752), AOM_ICDF(5070), AOM_ICDF(13245), AOM_ICDF(13565),
- AOM_ICDF(32768), },
- {AOM_ICDF(2729), AOM_ICDF(3042), AOM_ICDF(8218), AOM_ICDF(8530),
- AOM_ICDF(32768), },
- {AOM_ICDF(1385), AOM_ICDF(1697), AOM_ICDF(4196), AOM_ICDF(4508),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(26956), AOM_ICDF(27719), AOM_ICDF(31679), AOM_ICDF(32005),
- AOM_ICDF(32768), },
- {AOM_ICDF(16913), AOM_ICDF(17759), AOM_ICDF(29092), AOM_ICDF(29422),
- AOM_ICDF(32768), },
- {AOM_ICDF(8166), AOM_ICDF(8510), AOM_ICDF(20577), AOM_ICDF(20901),
- AOM_ICDF(32768), },
- {AOM_ICDF(4804), AOM_ICDF(5119), AOM_ICDF(13537), AOM_ICDF(13853),
- AOM_ICDF(32768), },
- {AOM_ICDF(2951), AOM_ICDF(3263), AOM_ICDF(8766), AOM_ICDF(9079),
- AOM_ICDF(32768), },
- {AOM_ICDF(1498), AOM_ICDF(1810), AOM_ICDF(4515), AOM_ICDF(4827),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(7335), AOM_ICDF(13463), AOM_ICDF(14286), AOM_ICDF(24588),
- AOM_ICDF(29117), AOM_ICDF(32768), },
- {AOM_ICDF(3212), AOM_ICDF(9869), AOM_ICDF(10336), AOM_ICDF(20172),
- AOM_ICDF(25029), AOM_ICDF(32768), },
- {AOM_ICDF(917), AOM_ICDF(6904), AOM_ICDF(7251), AOM_ICDF(15225),
- AOM_ICDF(18595), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23988), AOM_ICDF(24467), AOM_ICDF(31033), AOM_ICDF(31407),
- AOM_ICDF(32768), },
- {AOM_ICDF(20390), AOM_ICDF(23805), AOM_ICDF(30556), AOM_ICDF(30920),
- AOM_ICDF(32768), },
- {AOM_ICDF(13566), AOM_ICDF(16666), AOM_ICDF(27478), AOM_ICDF(27995),
- AOM_ICDF(32768), },
- {AOM_ICDF(10353), AOM_ICDF(12637), AOM_ICDF(23789), AOM_ICDF(24437),
- AOM_ICDF(32768), },
- {AOM_ICDF(7956), AOM_ICDF(9364), AOM_ICDF(19994), AOM_ICDF(20621),
- AOM_ICDF(32768), },
- {AOM_ICDF(6036), AOM_ICDF(6495), AOM_ICDF(15543), AOM_ICDF(16033),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(25643), AOM_ICDF(26692), AOM_ICDF(31634), AOM_ICDF(31957),
- AOM_ICDF(32768), },
- {AOM_ICDF(18721), AOM_ICDF(20381), AOM_ICDF(30130), AOM_ICDF(30466),
- AOM_ICDF(32768), },
- {AOM_ICDF(10914), AOM_ICDF(12337), AOM_ICDF(24817), AOM_ICDF(25177),
- AOM_ICDF(32768), },
- {AOM_ICDF(7843), AOM_ICDF(8667), AOM_ICDF(19826), AOM_ICDF(20212),
- AOM_ICDF(32768), },
- {AOM_ICDF(5080), AOM_ICDF(5484), AOM_ICDF(14225), AOM_ICDF(14587),
- AOM_ICDF(32768), },
- {AOM_ICDF(2880), AOM_ICDF(3192), AOM_ICDF(7916), AOM_ICDF(8236),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26447), AOM_ICDF(27233), AOM_ICDF(31779), AOM_ICDF(32097),
- AOM_ICDF(32768), },
- {AOM_ICDF(19042), AOM_ICDF(20153), AOM_ICDF(30217), AOM_ICDF(30540),
- AOM_ICDF(32768), },
- {AOM_ICDF(9858), AOM_ICDF(10440), AOM_ICDF(23424), AOM_ICDF(23753),
- AOM_ICDF(32768), },
- {AOM_ICDF(6276), AOM_ICDF(6657), AOM_ICDF(17158), AOM_ICDF(17489),
- AOM_ICDF(32768), },
- {AOM_ICDF(3725), AOM_ICDF(4039), AOM_ICDF(10981), AOM_ICDF(11303),
- AOM_ICDF(32768), },
- {AOM_ICDF(2041), AOM_ICDF(2345), AOM_ICDF(6069), AOM_ICDF(6373),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27189), AOM_ICDF(27737), AOM_ICDF(31897), AOM_ICDF(32213),
- AOM_ICDF(32768), },
- {AOM_ICDF(19763), AOM_ICDF(20443), AOM_ICDF(30288), AOM_ICDF(30607),
- AOM_ICDF(32768), },
- {AOM_ICDF(9033), AOM_ICDF(9393), AOM_ICDF(22097), AOM_ICDF(22415),
- AOM_ICDF(32768), },
- {AOM_ICDF(5417), AOM_ICDF(5747), AOM_ICDF(15230), AOM_ICDF(15545),
- AOM_ICDF(32768), },
- {AOM_ICDF(3397), AOM_ICDF(3709), AOM_ICDF(10342), AOM_ICDF(10655),
- AOM_ICDF(32768), },
- {AOM_ICDF(2805), AOM_ICDF(3108), AOM_ICDF(6119), AOM_ICDF(6422),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27874), AOM_ICDF(28490), AOM_ICDF(31981), AOM_ICDF(32301),
- AOM_ICDF(32768), },
- {AOM_ICDF(20112), AOM_ICDF(20724), AOM_ICDF(30607), AOM_ICDF(30935),
- AOM_ICDF(32768), },
- {AOM_ICDF(9188), AOM_ICDF(9549), AOM_ICDF(22544), AOM_ICDF(22875),
- AOM_ICDF(32768), },
- {AOM_ICDF(5590), AOM_ICDF(5918), AOM_ICDF(15550), AOM_ICDF(15878),
- AOM_ICDF(32768), },
- {AOM_ICDF(3567), AOM_ICDF(4015), AOM_ICDF(10658), AOM_ICDF(10988),
- AOM_ICDF(32768), },
- {AOM_ICDF(1950), AOM_ICDF(2388), AOM_ICDF(6246), AOM_ICDF(6681),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(25724), AOM_ICDF(26337), AOM_ICDF(28579), AOM_ICDF(29957),
- AOM_ICDF(30927), AOM_ICDF(32768), },
- {AOM_ICDF(9657), AOM_ICDF(12074), AOM_ICDF(16790), AOM_ICDF(21738),
- AOM_ICDF(23899), AOM_ICDF(32768), },
- {AOM_ICDF(4158), AOM_ICDF(7646), AOM_ICDF(10690), AOM_ICDF(16969),
- AOM_ICDF(18800), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(14330), AOM_ICDF(19826), AOM_ICDF(28364), AOM_ICDF(29154),
- AOM_ICDF(32768), },
- {AOM_ICDF(13503), AOM_ICDF(21352), AOM_ICDF(28714), AOM_ICDF(29534),
- AOM_ICDF(32768), },
- {AOM_ICDF(11754), AOM_ICDF(16853), AOM_ICDF(25931), AOM_ICDF(27325),
- AOM_ICDF(32768), },
- {AOM_ICDF(8311), AOM_ICDF(10581), AOM_ICDF(21258), AOM_ICDF(22633),
- AOM_ICDF(32768), },
- {AOM_ICDF(5297), AOM_ICDF(5819), AOM_ICDF(14162), AOM_ICDF(14892),
- AOM_ICDF(32768), },
- {AOM_ICDF(2887), AOM_ICDF(3208), AOM_ICDF(7455), AOM_ICDF(7768),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22005), AOM_ICDF(24480), AOM_ICDF(30925), AOM_ICDF(31309),
- AOM_ICDF(32768), },
- {AOM_ICDF(17332), AOM_ICDF(20557), AOM_ICDF(29696), AOM_ICDF(30096),
- AOM_ICDF(32768), },
- {AOM_ICDF(11930), AOM_ICDF(14337), AOM_ICDF(25931), AOM_ICDF(26358),
- AOM_ICDF(32768), },
- {AOM_ICDF(8888), AOM_ICDF(10020), AOM_ICDF(20964), AOM_ICDF(21352),
- AOM_ICDF(32768), },
- {AOM_ICDF(5694), AOM_ICDF(6135), AOM_ICDF(14997), AOM_ICDF(15376),
- AOM_ICDF(32768), },
- {AOM_ICDF(2521), AOM_ICDF(2842), AOM_ICDF(7765), AOM_ICDF(8069),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(23993), AOM_ICDF(25546), AOM_ICDF(31427), AOM_ICDF(31762),
- AOM_ICDF(32768), },
- {AOM_ICDF(18134), AOM_ICDF(20327), AOM_ICDF(29992), AOM_ICDF(30386),
- AOM_ICDF(32768), },
- {AOM_ICDF(10997), AOM_ICDF(12057), AOM_ICDF(24719), AOM_ICDF(25141),
- AOM_ICDF(32768), },
- {AOM_ICDF(5719), AOM_ICDF(6153), AOM_ICDF(16654), AOM_ICDF(17032),
- AOM_ICDF(32768), },
- {AOM_ICDF(3637), AOM_ICDF(3953), AOM_ICDF(11392), AOM_ICDF(11696),
- AOM_ICDF(32768), },
- {AOM_ICDF(1837), AOM_ICDF(2127), AOM_ICDF(5703), AOM_ICDF(5993),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26095), AOM_ICDF(26989), AOM_ICDF(31766), AOM_ICDF(32091),
- AOM_ICDF(32768), },
- {AOM_ICDF(19524), AOM_ICDF(20820), AOM_ICDF(30413), AOM_ICDF(30738),
- AOM_ICDF(32768), },
- {AOM_ICDF(9962), AOM_ICDF(10551), AOM_ICDF(22667), AOM_ICDF(23010),
- AOM_ICDF(32768), },
- {AOM_ICDF(5773), AOM_ICDF(6093), AOM_ICDF(15402), AOM_ICDF(15748),
- AOM_ICDF(32768), },
- {AOM_ICDF(3546), AOM_ICDF(3850), AOM_ICDF(9983), AOM_ICDF(10287),
- AOM_ICDF(32768), },
- {AOM_ICDF(2387), AOM_ICDF(2668), AOM_ICDF(5711), AOM_ICDF(5992),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29071), AOM_ICDF(29675), AOM_ICDF(31761), AOM_ICDF(32087),
- AOM_ICDF(32768), },
- {AOM_ICDF(18709), AOM_ICDF(19761), AOM_ICDF(29374), AOM_ICDF(29730),
- AOM_ICDF(32768), },
- {AOM_ICDF(9336), AOM_ICDF(10048), AOM_ICDF(22625), AOM_ICDF(22988),
- AOM_ICDF(32768), },
- {AOM_ICDF(6446), AOM_ICDF(6793), AOM_ICDF(16834), AOM_ICDF(17172),
- AOM_ICDF(32768), },
- {AOM_ICDF(4227), AOM_ICDF(4539), AOM_ICDF(11587), AOM_ICDF(11909),
- AOM_ICDF(32768), },
- {AOM_ICDF(2624), AOM_ICDF(2929), AOM_ICDF(7139), AOM_ICDF(7444),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(25114), AOM_ICDF(25872), AOM_ICDF(29577), AOM_ICDF(31173),
- AOM_ICDF(32008), AOM_ICDF(32768), },
- {AOM_ICDF(11286), AOM_ICDF(14376), AOM_ICDF(22156), AOM_ICDF(26266),
- AOM_ICDF(29278), AOM_ICDF(32768), },
- {AOM_ICDF(2680), AOM_ICDF(11055), AOM_ICDF(14683), AOM_ICDF(23068),
- AOM_ICDF(26651), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(22838), AOM_ICDF(24926), AOM_ICDF(31689), AOM_ICDF(32019),
- AOM_ICDF(32768), },
- {AOM_ICDF(19245), AOM_ICDF(24299), AOM_ICDF(31481), AOM_ICDF(31852),
- AOM_ICDF(32768), },
- {AOM_ICDF(15429), AOM_ICDF(21159), AOM_ICDF(30176), AOM_ICDF(30732),
- AOM_ICDF(32768), },
- {AOM_ICDF(12373), AOM_ICDF(17092), AOM_ICDF(26912), AOM_ICDF(27758),
- AOM_ICDF(32768), },
- {AOM_ICDF(10899), AOM_ICDF(13395), AOM_ICDF(23604), AOM_ICDF(24329),
- AOM_ICDF(32768), },
- {AOM_ICDF(12767), AOM_ICDF(13096), AOM_ICDF(21644), AOM_ICDF(22083),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24527), AOM_ICDF(26101), AOM_ICDF(31912), AOM_ICDF(32226),
- AOM_ICDF(32768), },
- {AOM_ICDF(20576), AOM_ICDF(22265), AOM_ICDF(31439), AOM_ICDF(31762),
- AOM_ICDF(32768), },
- {AOM_ICDF(13792), AOM_ICDF(15369), AOM_ICDF(28531), AOM_ICDF(28942),
- AOM_ICDF(32768), },
- {AOM_ICDF(9392), AOM_ICDF(11153), AOM_ICDF(23790), AOM_ICDF(24274),
- AOM_ICDF(32768), },
- {AOM_ICDF(5587), AOM_ICDF(6191), AOM_ICDF(19027), AOM_ICDF(19480),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24737), AOM_ICDF(25605), AOM_ICDF(31953), AOM_ICDF(32268),
- AOM_ICDF(32768), },
- {AOM_ICDF(20933), AOM_ICDF(21817), AOM_ICDF(31546), AOM_ICDF(31861),
- AOM_ICDF(32768), },
- {AOM_ICDF(13887), AOM_ICDF(14656), AOM_ICDF(28490), AOM_ICDF(28817),
- AOM_ICDF(32768), },
- {AOM_ICDF(10018), AOM_ICDF(11047), AOM_ICDF(23593), AOM_ICDF(23967),
- AOM_ICDF(32768), },
- {AOM_ICDF(3855), AOM_ICDF(6746), AOM_ICDF(15420), AOM_ICDF(18312),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25861), AOM_ICDF(26475), AOM_ICDF(32028), AOM_ICDF(32343),
- AOM_ICDF(32768), },
- {AOM_ICDF(22221), AOM_ICDF(22755), AOM_ICDF(31735), AOM_ICDF(32050),
- AOM_ICDF(32768), },
- {AOM_ICDF(15517), AOM_ICDF(15928), AOM_ICDF(29558), AOM_ICDF(29870),
- AOM_ICDF(32768), },
- {AOM_ICDF(7719), AOM_ICDF(8507), AOM_ICDF(20165), AOM_ICDF(20638),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(28675), AOM_ICDF(29326), AOM_ICDF(31767), AOM_ICDF(32092),
- AOM_ICDF(32768), },
- {AOM_ICDF(21491), AOM_ICDF(22422), AOM_ICDF(29827), AOM_ICDF(30197),
- AOM_ICDF(32768), },
- {AOM_ICDF(10080), AOM_ICDF(11350), AOM_ICDF(23883), AOM_ICDF(24321),
- AOM_ICDF(32768), },
- {AOM_ICDF(8383), AOM_ICDF(8793), AOM_ICDF(21382), AOM_ICDF(21739),
- AOM_ICDF(32768), },
- {AOM_ICDF(6835), AOM_ICDF(7137), AOM_ICDF(20646), AOM_ICDF(20947),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 32X32
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(15501), AOM_ICDF(16574), AOM_ICDF(17941), AOM_ICDF(20080),
- AOM_ICDF(21984), AOM_ICDF(32768), },
- {AOM_ICDF(1676), AOM_ICDF(3221), AOM_ICDF(3952), AOM_ICDF(6916),
- AOM_ICDF(7628), AOM_ICDF(32768), },
- {AOM_ICDF(468), AOM_ICDF(1825), AOM_ICDF(2211), AOM_ICDF(4504),
- AOM_ICDF(4877), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(5597), AOM_ICDF(9461), AOM_ICDF(16777), AOM_ICDF(17896),
- AOM_ICDF(32768), },
- {AOM_ICDF(5231), AOM_ICDF(9185), AOM_ICDF(16569), AOM_ICDF(17688),
- AOM_ICDF(32768), },
- {AOM_ICDF(4128), AOM_ICDF(6983), AOM_ICDF(13860), AOM_ICDF(15674),
- AOM_ICDF(32768), },
- {AOM_ICDF(2908), AOM_ICDF(4209), AOM_ICDF(9762), AOM_ICDF(11321),
- AOM_ICDF(32768), },
- {AOM_ICDF(2269), AOM_ICDF(2797), AOM_ICDF(7063), AOM_ICDF(7999),
- AOM_ICDF(32768), },
- {AOM_ICDF(1270), AOM_ICDF(1588), AOM_ICDF(3710), AOM_ICDF(4051),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(14862), AOM_ICDF(16903), AOM_ICDF(25712), AOM_ICDF(26189),
- AOM_ICDF(32768), },
- {AOM_ICDF(12778), AOM_ICDF(15420), AOM_ICDF(25395), AOM_ICDF(25836),
- AOM_ICDF(32768), },
- {AOM_ICDF(10402), AOM_ICDF(12279), AOM_ICDF(22858), AOM_ICDF(23302),
- AOM_ICDF(32768), },
- {AOM_ICDF(8026), AOM_ICDF(8897), AOM_ICDF(18866), AOM_ICDF(19290),
- AOM_ICDF(32768), },
- {AOM_ICDF(6610), AOM_ICDF(7121), AOM_ICDF(15967), AOM_ICDF(16322),
- AOM_ICDF(32768), },
- {AOM_ICDF(3980), AOM_ICDF(4296), AOM_ICDF(10443), AOM_ICDF(10757),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19177), AOM_ICDF(21516), AOM_ICDF(28474), AOM_ICDF(28892),
- AOM_ICDF(32768), },
- {AOM_ICDF(14901), AOM_ICDF(17006), AOM_ICDF(27100), AOM_ICDF(27500),
- AOM_ICDF(32768), },
- {AOM_ICDF(10655), AOM_ICDF(11487), AOM_ICDF(23288), AOM_ICDF(23664),
- AOM_ICDF(32768), },
- {AOM_ICDF(6980), AOM_ICDF(7408), AOM_ICDF(17955), AOM_ICDF(18288),
- AOM_ICDF(32768), },
- {AOM_ICDF(3891), AOM_ICDF(4206), AOM_ICDF(11255), AOM_ICDF(11570),
- AOM_ICDF(32768), },
- {AOM_ICDF(1532), AOM_ICDF(1844), AOM_ICDF(4593), AOM_ICDF(4905),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24338), AOM_ICDF(25864), AOM_ICDF(30962), AOM_ICDF(31346),
- AOM_ICDF(32768), },
- {AOM_ICDF(16430), AOM_ICDF(18166), AOM_ICDF(28700), AOM_ICDF(29068),
- AOM_ICDF(32768), },
- {AOM_ICDF(9726), AOM_ICDF(10244), AOM_ICDF(22575), AOM_ICDF(22934),
- AOM_ICDF(32768), },
- {AOM_ICDF(5539), AOM_ICDF(5868), AOM_ICDF(15030), AOM_ICDF(15363),
- AOM_ICDF(32768), },
- {AOM_ICDF(3305), AOM_ICDF(3620), AOM_ICDF(9405), AOM_ICDF(9720),
- AOM_ICDF(32768), },
- {AOM_ICDF(1482), AOM_ICDF(1794), AOM_ICDF(4429), AOM_ICDF(4741),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29843), AOM_ICDF(30312), AOM_ICDF(31922), AOM_ICDF(32242),
- AOM_ICDF(32768), },
- {AOM_ICDF(17390), AOM_ICDF(18061), AOM_ICDF(28932), AOM_ICDF(29258),
- AOM_ICDF(32768), },
- {AOM_ICDF(7968), AOM_ICDF(8308), AOM_ICDF(20128), AOM_ICDF(20447),
- AOM_ICDF(32768), },
- {AOM_ICDF(4523), AOM_ICDF(4838), AOM_ICDF(12959), AOM_ICDF(13274),
- AOM_ICDF(32768), },
- {AOM_ICDF(2765), AOM_ICDF(3077), AOM_ICDF(8284), AOM_ICDF(8596),
- AOM_ICDF(32768), },
- {AOM_ICDF(1422), AOM_ICDF(1733), AOM_ICDF(4244), AOM_ICDF(4556),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(19066), AOM_ICDF(20217), AOM_ICDF(21504), AOM_ICDF(24559),
- AOM_ICDF(26831), AOM_ICDF(32768), },
- {AOM_ICDF(5708), AOM_ICDF(7393), AOM_ICDF(8108), AOM_ICDF(11986),
- AOM_ICDF(17424), AOM_ICDF(32768), },
- {AOM_ICDF(1144), AOM_ICDF(2709), AOM_ICDF(3111), AOM_ICDF(6009),
- AOM_ICDF(10882), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17586), AOM_ICDF(17895), AOM_ICDF(27561), AOM_ICDF(28179),
- AOM_ICDF(32768), },
- {AOM_ICDF(16442), AOM_ICDF(19113), AOM_ICDF(27944), AOM_ICDF(28764),
- AOM_ICDF(32768), },
- {AOM_ICDF(12438), AOM_ICDF(17724), AOM_ICDF(26435), AOM_ICDF(27714),
- AOM_ICDF(32768), },
- {AOM_ICDF(9439), AOM_ICDF(12708), AOM_ICDF(22594), AOM_ICDF(24060),
- AOM_ICDF(32768), },
- {AOM_ICDF(7762), AOM_ICDF(9639), AOM_ICDF(19669), AOM_ICDF(20614),
- AOM_ICDF(32768), },
- {AOM_ICDF(5324), AOM_ICDF(5894), AOM_ICDF(14504), AOM_ICDF(15100),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23414), AOM_ICDF(25239), AOM_ICDF(31300), AOM_ICDF(31670),
- AOM_ICDF(32768), },
- {AOM_ICDF(18089), AOM_ICDF(22136), AOM_ICDF(30318), AOM_ICDF(30720),
- AOM_ICDF(32768), },
- {AOM_ICDF(12081), AOM_ICDF(15216), AOM_ICDF(27074), AOM_ICDF(27531),
- AOM_ICDF(32768), },
- {AOM_ICDF(9327), AOM_ICDF(10783), AOM_ICDF(22927), AOM_ICDF(23384),
- AOM_ICDF(32768), },
- {AOM_ICDF(6381), AOM_ICDF(6914), AOM_ICDF(17070), AOM_ICDF(17506),
- AOM_ICDF(32768), },
- {AOM_ICDF(3854), AOM_ICDF(4164), AOM_ICDF(10355), AOM_ICDF(10665),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24366), AOM_ICDF(25993), AOM_ICDF(31678), AOM_ICDF(32001),
- AOM_ICDF(32768), },
- {AOM_ICDF(18041), AOM_ICDF(21047), AOM_ICDF(30693), AOM_ICDF(31031),
- AOM_ICDF(32768), },
- {AOM_ICDF(11271), AOM_ICDF(12970), AOM_ICDF(26794), AOM_ICDF(27180),
- AOM_ICDF(32768), },
- {AOM_ICDF(8173), AOM_ICDF(8758), AOM_ICDF(21941), AOM_ICDF(22340),
- AOM_ICDF(32768), },
- {AOM_ICDF(5248), AOM_ICDF(5568), AOM_ICDF(15646), AOM_ICDF(15994),
- AOM_ICDF(32768), },
- {AOM_ICDF(2689), AOM_ICDF(3193), AOM_ICDF(6722), AOM_ICDF(7226),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27565), AOM_ICDF(28694), AOM_ICDF(31993), AOM_ICDF(32314),
- AOM_ICDF(32768), },
- {AOM_ICDF(20591), AOM_ICDF(22532), AOM_ICDF(31143), AOM_ICDF(31473),
- AOM_ICDF(32768), },
- {AOM_ICDF(11268), AOM_ICDF(12113), AOM_ICDF(25966), AOM_ICDF(26331),
- AOM_ICDF(32768), },
- {AOM_ICDF(7268), AOM_ICDF(7674), AOM_ICDF(19409), AOM_ICDF(19747),
- AOM_ICDF(32768), },
- {AOM_ICDF(4404), AOM_ICDF(4686), AOM_ICDF(13213), AOM_ICDF(13495),
- AOM_ICDF(32768), },
- {AOM_ICDF(2637), AOM_ICDF(3766), AOM_ICDF(7533), AOM_ICDF(8663),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29847), AOM_ICDF(30306), AOM_ICDF(32081), AOM_ICDF(32397),
- AOM_ICDF(32768), },
- {AOM_ICDF(22752), AOM_ICDF(23329), AOM_ICDF(31334), AOM_ICDF(31658),
- AOM_ICDF(32768), },
- {AOM_ICDF(10305), AOM_ICDF(10672), AOM_ICDF(24328), AOM_ICDF(24657),
- AOM_ICDF(32768), },
- {AOM_ICDF(5712), AOM_ICDF(6031), AOM_ICDF(16694), AOM_ICDF(17018),
- AOM_ICDF(32768), },
- {AOM_ICDF(3979), AOM_ICDF(4278), AOM_ICDF(10985), AOM_ICDF(11284),
- AOM_ICDF(32768), },
- {AOM_ICDF(2465), AOM_ICDF(2900), AOM_ICDF(6815), AOM_ICDF(7250),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(21609), AOM_ICDF(22111), AOM_ICDF(24624), AOM_ICDF(26045),
- AOM_ICDF(27916), AOM_ICDF(32768), },
- {AOM_ICDF(5498), AOM_ICDF(7300), AOM_ICDF(12100), AOM_ICDF(15851),
- AOM_ICDF(18370), AOM_ICDF(32768), },
- {AOM_ICDF(1268), AOM_ICDF(3284), AOM_ICDF(6295), AOM_ICDF(10711),
- AOM_ICDF(12999), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(9621), AOM_ICDF(16733), AOM_ICDF(26354), AOM_ICDF(27609),
- AOM_ICDF(32768), },
- {AOM_ICDF(9619), AOM_ICDF(18339), AOM_ICDF(27578), AOM_ICDF(28547),
- AOM_ICDF(32768), },
- {AOM_ICDF(9575), AOM_ICDF(18177), AOM_ICDF(24044), AOM_ICDF(25625),
- AOM_ICDF(32768), },
- {AOM_ICDF(5999), AOM_ICDF(11578), AOM_ICDF(20125), AOM_ICDF(22544),
- AOM_ICDF(32768), },
- {AOM_ICDF(4842), AOM_ICDF(6220), AOM_ICDF(12898), AOM_ICDF(14944),
- AOM_ICDF(32768), },
- {AOM_ICDF(948), AOM_ICDF(1247), AOM_ICDF(3292), AOM_ICDF(3791),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21002), AOM_ICDF(25135), AOM_ICDF(31208), AOM_ICDF(31629),
- AOM_ICDF(32768), },
- {AOM_ICDF(18106), AOM_ICDF(22116), AOM_ICDF(29422), AOM_ICDF(30013),
- AOM_ICDF(32768), },
- {AOM_ICDF(14580), AOM_ICDF(15855), AOM_ICDF(26171), AOM_ICDF(26535),
- AOM_ICDF(32768), },
- {AOM_ICDF(9965), AOM_ICDF(10971), AOM_ICDF(23043), AOM_ICDF(23378),
- AOM_ICDF(32768), },
- {AOM_ICDF(7123), AOM_ICDF(7395), AOM_ICDF(16893), AOM_ICDF(17232),
- AOM_ICDF(32768), },
- {AOM_ICDF(3187), AOM_ICDF(3432), AOM_ICDF(7600), AOM_ICDF(7845),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26393), AOM_ICDF(27823), AOM_ICDF(31691), AOM_ICDF(32028),
- AOM_ICDF(32768), },
- {AOM_ICDF(18449), AOM_ICDF(20915), AOM_ICDF(30092), AOM_ICDF(30531),
- AOM_ICDF(32768), },
- {AOM_ICDF(11710), AOM_ICDF(12263), AOM_ICDF(26838), AOM_ICDF(27139),
- AOM_ICDF(32768), },
- {AOM_ICDF(7737), AOM_ICDF(8192), AOM_ICDF(21299), AOM_ICDF(21572),
- AOM_ICDF(32768), },
- {AOM_ICDF(3572), AOM_ICDF(4038), AOM_ICDF(13822), AOM_ICDF(14287),
- AOM_ICDF(32768), },
- {AOM_ICDF(1689), AOM_ICDF(2703), AOM_ICDF(3716), AOM_ICDF(4729),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28371), AOM_ICDF(29507), AOM_ICDF(31986), AOM_ICDF(32314),
- AOM_ICDF(32768), },
- {AOM_ICDF(19411), AOM_ICDF(21758), AOM_ICDF(30225), AOM_ICDF(30579),
- AOM_ICDF(32768), },
- {AOM_ICDF(11995), AOM_ICDF(12434), AOM_ICDF(26661), AOM_ICDF(27026),
- AOM_ICDF(32768), },
- {AOM_ICDF(9175), AOM_ICDF(9721), AOM_ICDF(22173), AOM_ICDF(22501),
- AOM_ICDF(32768), },
- {AOM_ICDF(9078), AOM_ICDF(9742), AOM_ICDF(13063), AOM_ICDF(13727),
- AOM_ICDF(32768), },
- {AOM_ICDF(3192), AOM_ICDF(3830), AOM_ICDF(6809), AOM_ICDF(7447),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(31351), AOM_ICDF(31682), AOM_ICDF(32124), AOM_ICDF(32438),
- AOM_ICDF(32768), },
- {AOM_ICDF(20883), AOM_ICDF(22618), AOM_ICDF(30828), AOM_ICDF(31173),
- AOM_ICDF(32768), },
- {AOM_ICDF(11388), AOM_ICDF(12381), AOM_ICDF(24266), AOM_ICDF(24700),
- AOM_ICDF(32768), },
- {AOM_ICDF(6987), AOM_ICDF(7380), AOM_ICDF(18401), AOM_ICDF(18795),
- AOM_ICDF(32768), },
- {AOM_ICDF(2016), AOM_ICDF(2773), AOM_ICDF(7814), AOM_ICDF(8570),
- AOM_ICDF(32768), },
- {AOM_ICDF(2849), AOM_ICDF(4986), AOM_ICDF(8548), AOM_ICDF(10685),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(19461), AOM_ICDF(21728), AOM_ICDF(26601), AOM_ICDF(29082),
- AOM_ICDF(30105), AOM_ICDF(32768), },
- {AOM_ICDF(2845), AOM_ICDF(10798), AOM_ICDF(14316), AOM_ICDF(23118),
- AOM_ICDF(24609), AOM_ICDF(32768), },
- {AOM_ICDF(705), AOM_ICDF(10138), AOM_ICDF(12123), AOM_ICDF(21473),
- AOM_ICDF(23327), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(24780), AOM_ICDF(25836), AOM_ICDF(31623), AOM_ICDF(31938),
- AOM_ICDF(32768), },
- {AOM_ICDF(22703), AOM_ICDF(24390), AOM_ICDF(31353), AOM_ICDF(31797),
- AOM_ICDF(32768), },
- {AOM_ICDF(18218), AOM_ICDF(20834), AOM_ICDF(29429), AOM_ICDF(30327),
- AOM_ICDF(32768), },
- {AOM_ICDF(12517), AOM_ICDF(15626), AOM_ICDF(26000), AOM_ICDF(27281),
- AOM_ICDF(32768), },
- {AOM_ICDF(9988), AOM_ICDF(12791), AOM_ICDF(24073), AOM_ICDF(25295),
- AOM_ICDF(32768), },
- {AOM_ICDF(8529), AOM_ICDF(9202), AOM_ICDF(18853), AOM_ICDF(19751),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(26497), AOM_ICDF(27282), AOM_ICDF(32016), AOM_ICDF(32333),
- AOM_ICDF(32768), },
- {AOM_ICDF(22767), AOM_ICDF(24548), AOM_ICDF(31680), AOM_ICDF(32007),
- AOM_ICDF(32768), },
- {AOM_ICDF(10455), AOM_ICDF(13458), AOM_ICDF(26448), AOM_ICDF(26995),
- AOM_ICDF(32768), },
- {AOM_ICDF(3684), AOM_ICDF(4847), AOM_ICDF(20940), AOM_ICDF(21522),
- AOM_ICDF(32768), },
- {AOM_ICDF(9063), AOM_ICDF(11155), AOM_ICDF(17430), AOM_ICDF(19521),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(16384), AOM_ICDF(21299),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26212), AOM_ICDF(26755), AOM_ICDF(32090), AOM_ICDF(32400),
- AOM_ICDF(32768), },
- {AOM_ICDF(22239), AOM_ICDF(23123), AOM_ICDF(31406), AOM_ICDF(31725),
- AOM_ICDF(32768), },
- {AOM_ICDF(7220), AOM_ICDF(7609), AOM_ICDF(22715), AOM_ICDF(22993),
- AOM_ICDF(32768), },
- {AOM_ICDF(5554), AOM_ICDF(6387), AOM_ICDF(11941), AOM_ICDF(12774),
- AOM_ICDF(32768), },
- {AOM_ICDF(4915), AOM_ICDF(9830), AOM_ICDF(19661), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28796), AOM_ICDF(29237), AOM_ICDF(32134), AOM_ICDF(32446),
- AOM_ICDF(32768), },
- {AOM_ICDF(25912), AOM_ICDF(26456), AOM_ICDF(32010), AOM_ICDF(32321),
- AOM_ICDF(32768), },
- {AOM_ICDF(14399), AOM_ICDF(14668), AOM_ICDF(26039), AOM_ICDF(26309),
- AOM_ICDF(32768), },
- {AOM_ICDF(2341), AOM_ICDF(4096), AOM_ICDF(11703), AOM_ICDF(13458),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30253), AOM_ICDF(30635), AOM_ICDF(32016), AOM_ICDF(32330),
- AOM_ICDF(32768), },
- {AOM_ICDF(23066), AOM_ICDF(23485), AOM_ICDF(30571), AOM_ICDF(30897),
- AOM_ICDF(32768), },
- {AOM_ICDF(11664), AOM_ICDF(12092), AOM_ICDF(22146), AOM_ICDF(22496),
- AOM_ICDF(32768), },
- {AOM_ICDF(5932), AOM_ICDF(6387), AOM_ICDF(17131), AOM_ICDF(17470),
- AOM_ICDF(32768), },
- {AOM_ICDF(5501), AOM_ICDF(5846), AOM_ICDF(15763), AOM_ICDF(16097),
- AOM_ICDF(32768), },
- {AOM_ICDF(4946), AOM_ICDF(6801), AOM_ICDF(14838), AOM_ICDF(16693),
- AOM_ICDF(32768), },
- },
- },
- },
- },
-};
+static const aom_cdf_prob
+ av1_default_dc_sign_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][DC_SIGN_CONTEXTS]
+ [CDF_SIZE(2)] = {
+ { {
+ { AOM_CDF2(128 * 125) },
+ { AOM_CDF2(128 * 102) },
+ { AOM_CDF2(128 * 147) },
+ },
+ {
+ { AOM_CDF2(128 * 119) },
+ { AOM_CDF2(128 * 101) },
+ { AOM_CDF2(128 * 135) },
+ } },
+ { {
+ { AOM_CDF2(128 * 125) },
+ { AOM_CDF2(128 * 102) },
+ { AOM_CDF2(128 * 147) },
+ },
+ {
+ { AOM_CDF2(128 * 119) },
+ { AOM_CDF2(128 * 101) },
+ { AOM_CDF2(128 * 135) },
+ } },
+ { {
+ { AOM_CDF2(128 * 125) },
+ { AOM_CDF2(128 * 102) },
+ { AOM_CDF2(128 * 147) },
+ },
+ {
+ { AOM_CDF2(128 * 119) },
+ { AOM_CDF2(128 * 101) },
+ { AOM_CDF2(128 * 135) },
+ } },
+ { {
+ { AOM_CDF2(128 * 125) },
+ { AOM_CDF2(128 * 102) },
+ { AOM_CDF2(128 * 147) },
+ },
+ {
+ { AOM_CDF2(128 * 119) },
+ { AOM_CDF2(128 * 101) },
+ { AOM_CDF2(128 * 135) },
+ } },
+ };
+
+static const aom_cdf_prob
+ av1_default_txb_skip_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS]
+ [CDF_SIZE(2)] = { { { { AOM_CDF2(31849) },
+ { AOM_CDF2(5892) },
+ { AOM_CDF2(12112) },
+ { AOM_CDF2(21935) },
+ { AOM_CDF2(20289) },
+ { AOM_CDF2(27473) },
+ { AOM_CDF2(32487) },
+ { AOM_CDF2(7654) },
+ { AOM_CDF2(19473) },
+ { AOM_CDF2(29984) },
+ { AOM_CDF2(9961) },
+ { AOM_CDF2(30242) },
+ { AOM_CDF2(32117) } },
+ { { AOM_CDF2(31548) },
+ { AOM_CDF2(1549) },
+ { AOM_CDF2(10130) },
+ { AOM_CDF2(16656) },
+ { AOM_CDF2(18591) },
+ { AOM_CDF2(26308) },
+ { AOM_CDF2(32537) },
+ { AOM_CDF2(5403) },
+ { AOM_CDF2(18096) },
+ { AOM_CDF2(30003) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(29957) },
+ { AOM_CDF2(5391) },
+ { AOM_CDF2(18039) },
+ { AOM_CDF2(23566) },
+ { AOM_CDF2(22431) },
+ { AOM_CDF2(25822) },
+ { AOM_CDF2(32197) },
+ { AOM_CDF2(3778) },
+ { AOM_CDF2(15336) },
+ { AOM_CDF2(28981) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(17920) },
+ { AOM_CDF2(1818) },
+ { AOM_CDF2(7282) },
+ { AOM_CDF2(25273) },
+ { AOM_CDF2(10923) },
+ { AOM_CDF2(31554) },
+ { AOM_CDF2(32624) },
+ { AOM_CDF2(1366) },
+ { AOM_CDF2(15628) },
+ { AOM_CDF2(30462) },
+ { AOM_CDF2(146) },
+ { AOM_CDF2(5132) },
+ { AOM_CDF2(31657) } },
+ { { AOM_CDF2(6308) },
+ { AOM_CDF2(117) },
+ { AOM_CDF2(1638) },
+ { AOM_CDF2(2161) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(10923) },
+ { AOM_CDF2(30247) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } } },
+ { { { AOM_CDF2(30371) },
+ { AOM_CDF2(7570) },
+ { AOM_CDF2(13155) },
+ { AOM_CDF2(20751) },
+ { AOM_CDF2(20969) },
+ { AOM_CDF2(27067) },
+ { AOM_CDF2(32013) },
+ { AOM_CDF2(5495) },
+ { AOM_CDF2(17942) },
+ { AOM_CDF2(28280) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(31782) },
+ { AOM_CDF2(1836) },
+ { AOM_CDF2(10689) },
+ { AOM_CDF2(17604) },
+ { AOM_CDF2(21622) },
+ { AOM_CDF2(27518) },
+ { AOM_CDF2(32399) },
+ { AOM_CDF2(4419) },
+ { AOM_CDF2(16294) },
+ { AOM_CDF2(28345) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(31901) },
+ { AOM_CDF2(10311) },
+ { AOM_CDF2(18047) },
+ { AOM_CDF2(24806) },
+ { AOM_CDF2(23288) },
+ { AOM_CDF2(27914) },
+ { AOM_CDF2(32296) },
+ { AOM_CDF2(4215) },
+ { AOM_CDF2(15756) },
+ { AOM_CDF2(28341) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(26726) },
+ { AOM_CDF2(1045) },
+ { AOM_CDF2(11703) },
+ { AOM_CDF2(20590) },
+ { AOM_CDF2(18554) },
+ { AOM_CDF2(25970) },
+ { AOM_CDF2(31938) },
+ { AOM_CDF2(5583) },
+ { AOM_CDF2(21313) },
+ { AOM_CDF2(29390) },
+ { AOM_CDF2(641) },
+ { AOM_CDF2(22265) },
+ { AOM_CDF2(31452) } },
+ { { AOM_CDF2(26584) },
+ { AOM_CDF2(188) },
+ { AOM_CDF2(8847) },
+ { AOM_CDF2(24519) },
+ { AOM_CDF2(22938) },
+ { AOM_CDF2(30583) },
+ { AOM_CDF2(32608) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } } },
+ { { { AOM_CDF2(29614) },
+ { AOM_CDF2(9068) },
+ { AOM_CDF2(12924) },
+ { AOM_CDF2(19538) },
+ { AOM_CDF2(17737) },
+ { AOM_CDF2(24619) },
+ { AOM_CDF2(30642) },
+ { AOM_CDF2(4119) },
+ { AOM_CDF2(16026) },
+ { AOM_CDF2(25657) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(31957) },
+ { AOM_CDF2(3230) },
+ { AOM_CDF2(11153) },
+ { AOM_CDF2(18123) },
+ { AOM_CDF2(20143) },
+ { AOM_CDF2(26536) },
+ { AOM_CDF2(31986) },
+ { AOM_CDF2(3050) },
+ { AOM_CDF2(14603) },
+ { AOM_CDF2(25155) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(32363) },
+ { AOM_CDF2(10692) },
+ { AOM_CDF2(19090) },
+ { AOM_CDF2(24357) },
+ { AOM_CDF2(24442) },
+ { AOM_CDF2(28312) },
+ { AOM_CDF2(32169) },
+ { AOM_CDF2(3648) },
+ { AOM_CDF2(15690) },
+ { AOM_CDF2(26815) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(30669) },
+ { AOM_CDF2(3832) },
+ { AOM_CDF2(11663) },
+ { AOM_CDF2(18889) },
+ { AOM_CDF2(19782) },
+ { AOM_CDF2(23313) },
+ { AOM_CDF2(31330) },
+ { AOM_CDF2(5124) },
+ { AOM_CDF2(18719) },
+ { AOM_CDF2(28468) },
+ { AOM_CDF2(3082) },
+ { AOM_CDF2(20982) },
+ { AOM_CDF2(29443) } },
+ { { AOM_CDF2(28573) },
+ { AOM_CDF2(3183) },
+ { AOM_CDF2(17802) },
+ { AOM_CDF2(25977) },
+ { AOM_CDF2(26677) },
+ { AOM_CDF2(27832) },
+ { AOM_CDF2(32387) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } } },
+ { { { AOM_CDF2(26887) },
+ { AOM_CDF2(6729) },
+ { AOM_CDF2(10361) },
+ { AOM_CDF2(17442) },
+ { AOM_CDF2(15045) },
+ { AOM_CDF2(22478) },
+ { AOM_CDF2(29072) },
+ { AOM_CDF2(2713) },
+ { AOM_CDF2(11861) },
+ { AOM_CDF2(20773) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(31903) },
+ { AOM_CDF2(2044) },
+ { AOM_CDF2(7528) },
+ { AOM_CDF2(14618) },
+ { AOM_CDF2(16182) },
+ { AOM_CDF2(24168) },
+ { AOM_CDF2(31037) },
+ { AOM_CDF2(2786) },
+ { AOM_CDF2(11194) },
+ { AOM_CDF2(20155) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(32510) },
+ { AOM_CDF2(8430) },
+ { AOM_CDF2(17318) },
+ { AOM_CDF2(24154) },
+ { AOM_CDF2(23674) },
+ { AOM_CDF2(28789) },
+ { AOM_CDF2(32139) },
+ { AOM_CDF2(3440) },
+ { AOM_CDF2(13117) },
+ { AOM_CDF2(22702) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } },
+ { { AOM_CDF2(31671) },
+ { AOM_CDF2(2056) },
+ { AOM_CDF2(11746) },
+ { AOM_CDF2(16852) },
+ { AOM_CDF2(18635) },
+ { AOM_CDF2(24715) },
+ { AOM_CDF2(31484) },
+ { AOM_CDF2(4656) },
+ { AOM_CDF2(16074) },
+ { AOM_CDF2(24704) },
+ { AOM_CDF2(1806) },
+ { AOM_CDF2(14645) },
+ { AOM_CDF2(25336) } },
+ { { AOM_CDF2(31539) },
+ { AOM_CDF2(8433) },
+ { AOM_CDF2(20576) },
+ { AOM_CDF2(27904) },
+ { AOM_CDF2(27852) },
+ { AOM_CDF2(30026) },
+ { AOM_CDF2(32441) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) } } } };
+
+static const aom_cdf_prob
+ av1_default_eob_extra_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
+ [EOB_COEF_CONTEXTS][CDF_SIZE(2)] = {
+ { { {
+ { AOM_CDF2(16961) },
+ { AOM_CDF2(17223) },
+ { AOM_CDF2(7621) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(19069) },
+ { AOM_CDF2(22525) },
+ { AOM_CDF2(13377) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(20401) },
+ { AOM_CDF2(17025) },
+ { AOM_CDF2(12845) },
+ { AOM_CDF2(12873) },
+ { AOM_CDF2(14094) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(20681) },
+ { AOM_CDF2(20701) },
+ { AOM_CDF2(15250) },
+ { AOM_CDF2(15017) },
+ { AOM_CDF2(14928) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(23905) },
+ { AOM_CDF2(17194) },
+ { AOM_CDF2(16170) },
+ { AOM_CDF2(17695) },
+ { AOM_CDF2(13826) },
+ { AOM_CDF2(15810) },
+ { AOM_CDF2(12036) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(23959) },
+ { AOM_CDF2(20799) },
+ { AOM_CDF2(19021) },
+ { AOM_CDF2(16203) },
+ { AOM_CDF2(17886) },
+ { AOM_CDF2(14144) },
+ { AOM_CDF2(12010) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(27399) },
+ { AOM_CDF2(16327) },
+ { AOM_CDF2(18071) },
+ { AOM_CDF2(19584) },
+ { AOM_CDF2(20721) },
+ { AOM_CDF2(18432) },
+ { AOM_CDF2(19560) },
+ { AOM_CDF2(10150) },
+ { AOM_CDF2(8805) },
+ },
+ {
+ { AOM_CDF2(24932) },
+ { AOM_CDF2(20833) },
+ { AOM_CDF2(12027) },
+ { AOM_CDF2(16670) },
+ { AOM_CDF2(19914) },
+ { AOM_CDF2(15106) },
+ { AOM_CDF2(17662) },
+ { AOM_CDF2(13783) },
+ { AOM_CDF2(28756) },
+ } },
+ { {
+ { AOM_CDF2(23406) },
+ { AOM_CDF2(21845) },
+ { AOM_CDF2(18432) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(17096) },
+ { AOM_CDF2(12561) },
+ { AOM_CDF2(17320) },
+ { AOM_CDF2(22395) },
+ { AOM_CDF2(21370) },
+ },
+ {
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } } },
+ { { {
+ { AOM_CDF2(17471) },
+ { AOM_CDF2(20223) },
+ { AOM_CDF2(11357) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(20335) },
+ { AOM_CDF2(21667) },
+ { AOM_CDF2(14818) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(20430) },
+ { AOM_CDF2(20662) },
+ { AOM_CDF2(15367) },
+ { AOM_CDF2(16970) },
+ { AOM_CDF2(14657) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(22117) },
+ { AOM_CDF2(22028) },
+ { AOM_CDF2(18650) },
+ { AOM_CDF2(16042) },
+ { AOM_CDF2(15885) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(22409) },
+ { AOM_CDF2(21012) },
+ { AOM_CDF2(15650) },
+ { AOM_CDF2(17395) },
+ { AOM_CDF2(15469) },
+ { AOM_CDF2(20205) },
+ { AOM_CDF2(19511) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(24220) },
+ { AOM_CDF2(22480) },
+ { AOM_CDF2(17737) },
+ { AOM_CDF2(18916) },
+ { AOM_CDF2(19268) },
+ { AOM_CDF2(18412) },
+ { AOM_CDF2(18844) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(25991) },
+ { AOM_CDF2(20314) },
+ { AOM_CDF2(17731) },
+ { AOM_CDF2(19678) },
+ { AOM_CDF2(18649) },
+ { AOM_CDF2(17307) },
+ { AOM_CDF2(21798) },
+ { AOM_CDF2(17549) },
+ { AOM_CDF2(15630) },
+ },
+ {
+ { AOM_CDF2(26585) },
+ { AOM_CDF2(21469) },
+ { AOM_CDF2(20432) },
+ { AOM_CDF2(17735) },
+ { AOM_CDF2(19280) },
+ { AOM_CDF2(15235) },
+ { AOM_CDF2(20297) },
+ { AOM_CDF2(22471) },
+ { AOM_CDF2(28997) },
+ } },
+ { {
+ { AOM_CDF2(26605) },
+ { AOM_CDF2(11304) },
+ { AOM_CDF2(16726) },
+ { AOM_CDF2(16560) },
+ { AOM_CDF2(20866) },
+ { AOM_CDF2(23524) },
+ { AOM_CDF2(19878) },
+ { AOM_CDF2(13469) },
+ { AOM_CDF2(23084) },
+ },
+ {
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } } },
+ { { {
+ { AOM_CDF2(18983) },
+ { AOM_CDF2(20512) },
+ { AOM_CDF2(14885) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(20090) },
+ { AOM_CDF2(19444) },
+ { AOM_CDF2(17286) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(19139) },
+ { AOM_CDF2(21487) },
+ { AOM_CDF2(18959) },
+ { AOM_CDF2(20910) },
+ { AOM_CDF2(19089) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(20536) },
+ { AOM_CDF2(20664) },
+ { AOM_CDF2(20625) },
+ { AOM_CDF2(19123) },
+ { AOM_CDF2(14862) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(19833) },
+ { AOM_CDF2(21502) },
+ { AOM_CDF2(17485) },
+ { AOM_CDF2(20267) },
+ { AOM_CDF2(18353) },
+ { AOM_CDF2(23329) },
+ { AOM_CDF2(21478) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(22041) },
+ { AOM_CDF2(23434) },
+ { AOM_CDF2(20001) },
+ { AOM_CDF2(20554) },
+ { AOM_CDF2(20951) },
+ { AOM_CDF2(20145) },
+ { AOM_CDF2(15562) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(23312) },
+ { AOM_CDF2(21607) },
+ { AOM_CDF2(16526) },
+ { AOM_CDF2(18957) },
+ { AOM_CDF2(18034) },
+ { AOM_CDF2(18934) },
+ { AOM_CDF2(24247) },
+ { AOM_CDF2(16921) },
+ { AOM_CDF2(17080) },
+ },
+ {
+ { AOM_CDF2(26579) },
+ { AOM_CDF2(24910) },
+ { AOM_CDF2(18637) },
+ { AOM_CDF2(19800) },
+ { AOM_CDF2(20388) },
+ { AOM_CDF2(9887) },
+ { AOM_CDF2(15642) },
+ { AOM_CDF2(30198) },
+ { AOM_CDF2(24721) },
+ } },
+ { {
+ { AOM_CDF2(26998) },
+ { AOM_CDF2(16737) },
+ { AOM_CDF2(17838) },
+ { AOM_CDF2(18922) },
+ { AOM_CDF2(19515) },
+ { AOM_CDF2(18636) },
+ { AOM_CDF2(17333) },
+ { AOM_CDF2(15776) },
+ { AOM_CDF2(22658) },
+ },
+ {
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } } },
+ { { {
+ { AOM_CDF2(20177) },
+ { AOM_CDF2(20789) },
+ { AOM_CDF2(20262) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(21416) },
+ { AOM_CDF2(20855) },
+ { AOM_CDF2(23410) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(20238) },
+ { AOM_CDF2(21057) },
+ { AOM_CDF2(19159) },
+ { AOM_CDF2(22337) },
+ { AOM_CDF2(20159) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(20125) },
+ { AOM_CDF2(20559) },
+ { AOM_CDF2(21707) },
+ { AOM_CDF2(22296) },
+ { AOM_CDF2(17333) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(19941) },
+ { AOM_CDF2(20527) },
+ { AOM_CDF2(21470) },
+ { AOM_CDF2(22487) },
+ { AOM_CDF2(19558) },
+ { AOM_CDF2(22354) },
+ { AOM_CDF2(20331) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ },
+ {
+ { AOM_CDF2(22752) },
+ { AOM_CDF2(25006) },
+ { AOM_CDF2(22075) },
+ { AOM_CDF2(21576) },
+ { AOM_CDF2(17740) },
+ { AOM_CDF2(21690) },
+ { AOM_CDF2(19211) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } },
+ { {
+ { AOM_CDF2(21442) },
+ { AOM_CDF2(22358) },
+ { AOM_CDF2(18503) },
+ { AOM_CDF2(20291) },
+ { AOM_CDF2(19945) },
+ { AOM_CDF2(21294) },
+ { AOM_CDF2(21178) },
+ { AOM_CDF2(19400) },
+ { AOM_CDF2(10556) },
+ },
+ {
+ { AOM_CDF2(24648) },
+ { AOM_CDF2(24949) },
+ { AOM_CDF2(20708) },
+ { AOM_CDF2(23905) },
+ { AOM_CDF2(20501) },
+ { AOM_CDF2(9558) },
+ { AOM_CDF2(9423) },
+ { AOM_CDF2(30365) },
+ { AOM_CDF2(19253) },
+ } },
+ { {
+ { AOM_CDF2(26064) },
+ { AOM_CDF2(22098) },
+ { AOM_CDF2(19613) },
+ { AOM_CDF2(20525) },
+ { AOM_CDF2(17595) },
+ { AOM_CDF2(16618) },
+ { AOM_CDF2(20497) },
+ { AOM_CDF2(18989) },
+ { AOM_CDF2(15513) },
+ },
+ {
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ { AOM_CDF2(16384) },
+ } } }
+ };
+
+static const aom_cdf_prob
+ av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 5)] = { { { { AOM_CDF5(840, 1039, 1980, 4895) },
+ { AOM_CDF5(370, 671, 1883, 4471) } },
+ { { AOM_CDF5(3247, 4950, 9688, 14563) },
+ { AOM_CDF5(1904, 3354, 7763, 14647) } } },
+ { { { AOM_CDF5(2125, 2551, 5165, 8946) },
+ { AOM_CDF5(513, 765, 1859, 6339) } },
+ { { AOM_CDF5(7637, 9498, 14259, 19108) },
+ { AOM_CDF5(2497, 4096, 8866, 16993) } } },
+ { { { AOM_CDF5(4016, 4897, 8881, 14968) },
+ { AOM_CDF5(716, 1105, 2646, 10056) } },
+ { { AOM_CDF5(11139, 13270, 18241, 23566) },
+ { AOM_CDF5(3192, 5032, 10297, 19755) } } },
+ { { { AOM_CDF5(6708, 8958, 14746, 22133) },
+ { AOM_CDF5(1222, 2074, 4783, 15410) } },
+ { { AOM_CDF5(19575, 21766, 26044, 29709) },
+ { AOM_CDF5(7297, 10767, 19273, 28194) } } } };
+
+static const aom_cdf_prob
+ av1_default_eob_multi32_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 6)] = { { { { AOM_CDF6(400, 520, 977, 2102, 6542) },
+ { AOM_CDF6(210, 405, 1315, 3326, 7537) } },
+ { { AOM_CDF6(2636, 4273, 7588, 11794, 20401) },
+ { AOM_CDF6(1786, 3179, 6902, 11357, 19054) } } },
+ { { { AOM_CDF6(989, 1249, 2019, 4151, 10785) },
+ { AOM_CDF6(313, 441, 1099, 2917, 8562) } },
+ { { AOM_CDF6(8394, 10352, 13932, 18855, 26014) },
+ { AOM_CDF6(2578, 4124, 8181, 13670, 24234) } } },
+ { { { AOM_CDF6(2515, 3003, 4452, 8162, 16041) },
+ { AOM_CDF6(574, 821, 1836, 5089, 13128) } },
+ { { AOM_CDF6(13468, 16303, 20361, 25105, 29281) },
+ { AOM_CDF6(3542, 5502, 10415, 16760, 25644) } } },
+ { { { AOM_CDF6(4617, 5709, 8446, 13584, 23135) },
+ { AOM_CDF6(1156, 1702, 3675, 9274, 20539) } },
+ { { AOM_CDF6(22086, 24282, 27010, 29770, 31743) },
+ { AOM_CDF6(7699, 10897, 20891, 26926, 31628) } } } };
+
+static const aom_cdf_prob
+ av1_default_eob_multi64_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 7)] = { { { { AOM_CDF7(329, 498, 1101, 1784, 3265, 7758) },
+ { AOM_CDF7(335, 730, 1459, 5494, 8755, 12997) } },
+ { { AOM_CDF7(3505, 5304, 10086, 13814, 17684, 23370) },
+ { AOM_CDF7(1563, 2700, 4876, 10911, 14706, 22480) } } },
+ { { { AOM_CDF7(1260, 1446, 2253, 3712, 6652, 13369) },
+ { AOM_CDF7(401, 605, 1029, 2563, 5845, 12626) } },
+ { { AOM_CDF7(8609, 10612, 14624, 18714, 22614, 29024) },
+ { AOM_CDF7(1923, 3127, 5867, 9703, 14277, 27100) } } },
+ { { { AOM_CDF7(2374, 2772, 4583, 7276, 12288, 19706) },
+ { AOM_CDF7(497, 810, 1315, 3000, 7004, 15641) } },
+ { { AOM_CDF7(15050, 17126, 21410, 24886, 28156, 30726) },
+ { AOM_CDF7(4034, 6290, 10235, 14982, 21214, 28491) } } },
+ { { { AOM_CDF7(6307, 7541, 12060, 16358, 22553, 27865) },
+ { AOM_CDF7(1289, 2320, 3971, 7926, 14153, 24291) } },
+ { { AOM_CDF7(24212, 25708, 28268, 30035, 31307, 32049) },
+ { AOM_CDF7(8726, 12378, 19409, 26450, 30038, 32462) } } } };
+
+static const aom_cdf_prob
+ av1_default_eob_multi128_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 8)] = {
+ { { { AOM_CDF8(219, 482, 1140, 2091, 3680, 6028, 12586) },
+ { AOM_CDF8(371, 699, 1254, 4830, 9479, 12562, 17497) } },
+ { { AOM_CDF8(5245, 7456, 12880, 15852, 20033, 23932, 27608) },
+ { AOM_CDF8(2054, 3472, 5869, 14232, 18242, 20590, 26752) } } },
+ { { { AOM_CDF8(685, 933, 1488, 2714, 4766, 8562, 19254) },
+ { AOM_CDF8(217, 352, 618, 2303, 5261, 9969, 17472) } },
+ { { AOM_CDF8(8045, 11200, 15497, 19595, 23948, 27408, 30938) },
+ { AOM_CDF8(2310, 4160, 7471, 14997, 17931, 20768, 30240) } } },
+ { { { AOM_CDF8(1366, 1738, 2527, 5016, 9355, 15797, 24643) },
+ { AOM_CDF8(354, 558, 944, 2760, 7287, 14037, 21779) } },
+ { { AOM_CDF8(13627, 16246, 20173, 24429, 27948, 30415, 31863) },
+ { AOM_CDF8(6275, 9889, 14769, 23164, 27988, 30493, 32272) } } },
+ { { { AOM_CDF8(3472, 4885, 7489, 12481, 18517, 24536, 29635) },
+ { AOM_CDF8(886, 1731, 3271, 8469, 15569, 22126, 28383) } },
+ { { AOM_CDF8(24313, 26062, 28385, 30107, 31217, 31898, 32345) },
+ { AOM_CDF8(9165, 13282, 21150, 30286, 31894, 32571, 32712) } } }
+ };
+
+static const aom_cdf_prob
+ av1_default_eob_multi256_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 9)] = {
+ { { { AOM_CDF9(310, 584, 1887, 3589, 6168, 8611, 11352, 15652) },
+ { AOM_CDF9(998, 1850, 2998, 5604, 17341, 19888, 22899, 25583) } },
+ { { AOM_CDF9(2520, 3240, 5952, 8870, 12577, 17558, 19954, 24168) },
+ { AOM_CDF9(2203, 4130, 7435, 10739, 20652, 23681, 25609, 27261) } } },
+ { { { AOM_CDF9(1448, 2109, 4151, 6263, 9329, 13260, 17944, 23300) },
+ { AOM_CDF9(399, 1019, 1749, 3038, 10444, 15546, 22739, 27294) } },
+ { { AOM_CDF9(6402, 8148, 12623, 15072, 18728, 22847, 26447, 29377) },
+ { AOM_CDF9(1674, 3252, 5734, 10159, 22397, 23802, 24821, 30940) } } },
+ { { { AOM_CDF9(3089, 3920, 6038, 9460, 14266, 19881, 25766, 29176) },
+ { AOM_CDF9(1084, 2358, 3488, 5122, 11483, 18103, 26023, 29799) } },
+ { { AOM_CDF9(11514, 13794, 17480, 20754, 24361, 27378, 29492, 31277) },
+ { AOM_CDF9(6571, 9610, 15516, 21826, 29092, 30829, 31842,
+ 32708) } } },
+ { { { AOM_CDF9(5348, 7113, 11820, 15924, 22106, 26777, 30334, 31757) },
+ { AOM_CDF9(2453, 4474, 6307, 8777, 16474, 22975, 29000, 31547) } },
+ { { AOM_CDF9(23110, 24597, 27140, 28894, 30167, 30927, 31392, 32094) },
+ { AOM_CDF9(9998, 17661, 25178, 28097, 31308, 32038, 32403,
+ 32695) } } }
+ };
+
+static const aom_cdf_prob
+ av1_default_eob_multi512_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 10)] = { { { { AOM_CDF10(641, 983, 3707, 5430, 10234, 14958, 18788,
+ 23412, 26061) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } },
+ { { AOM_CDF10(5095, 6446, 9996, 13354, 16017, 17986, 20919,
+ 26129, 29140) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } } },
+ { { { AOM_CDF10(1230, 2278, 5035, 7776, 11871, 15346, 19590,
+ 24584, 28749) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } },
+ { { AOM_CDF10(7265, 9979, 15819, 19250, 21780, 23846, 26478,
+ 28396, 31811) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } } },
+ { { { AOM_CDF10(2624, 3936, 6480, 9686, 13979, 17726, 23267,
+ 28410, 31078) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } },
+ { { AOM_CDF10(12015, 14769, 19588, 22052, 24222, 25812,
+ 27300, 29219, 32114) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } } },
+ { { { AOM_CDF10(5927, 7809, 10923, 14597, 19439, 24135, 28456,
+ 31142, 32060) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } },
+ { { AOM_CDF10(21093, 23043, 25742, 27658, 29097, 29716,
+ 30073, 30820, 31956) },
+ { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+ 26214, 29491) } } } };
-static const coeff_cdf_model
-av1_default_coef_head_cdfs_q1[TX_SIZES][PLANE_TYPES] = {
- { // TX 4X4
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(21480), AOM_ICDF(22344), AOM_ICDF(27339), AOM_ICDF(29181),
- AOM_ICDF(29765), AOM_ICDF(32768), },
- {AOM_ICDF(9705), AOM_ICDF(12374), AOM_ICDF(20269), AOM_ICDF(24109),
- AOM_ICDF(25071), AOM_ICDF(32768), },
- {AOM_ICDF(2883), AOM_ICDF(6716), AOM_ICDF(10461), AOM_ICDF(16169),
- AOM_ICDF(17355), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(8632), AOM_ICDF(15472), AOM_ICDF(26027), AOM_ICDF(26596),
- AOM_ICDF(32768), },
- {AOM_ICDF(8543), AOM_ICDF(14383), AOM_ICDF(25665), AOM_ICDF(26207),
- AOM_ICDF(32768), },
- {AOM_ICDF(8561), AOM_ICDF(12583), AOM_ICDF(22962), AOM_ICDF(23529),
- AOM_ICDF(32768), },
- {AOM_ICDF(6538), AOM_ICDF(8023), AOM_ICDF(18106), AOM_ICDF(18672),
- AOM_ICDF(32768), },
- {AOM_ICDF(4363), AOM_ICDF(4797), AOM_ICDF(12512), AOM_ICDF(12937),
- AOM_ICDF(32768), },
- {AOM_ICDF(2471), AOM_ICDF(2791), AOM_ICDF(7274), AOM_ICDF(7605),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(14783), AOM_ICDF(18891), AOM_ICDF(29122), AOM_ICDF(29700),
- AOM_ICDF(32768), },
- {AOM_ICDF(11829), AOM_ICDF(16696), AOM_ICDF(28114), AOM_ICDF(28591),
- AOM_ICDF(32768), },
- {AOM_ICDF(8965), AOM_ICDF(11076), AOM_ICDF(23514), AOM_ICDF(24031),
- AOM_ICDF(32768), },
- {AOM_ICDF(6257), AOM_ICDF(7011), AOM_ICDF(17779), AOM_ICDF(18315),
- AOM_ICDF(32768), },
- {AOM_ICDF(4329), AOM_ICDF(4704), AOM_ICDF(12448), AOM_ICDF(12839),
- AOM_ICDF(32768), },
- {AOM_ICDF(2542), AOM_ICDF(2860), AOM_ICDF(7886), AOM_ICDF(8207),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19181), AOM_ICDF(22038), AOM_ICDF(30697), AOM_ICDF(31106),
- AOM_ICDF(32768), },
- {AOM_ICDF(12174), AOM_ICDF(17208), AOM_ICDF(28897), AOM_ICDF(29328),
- AOM_ICDF(32768), },
- {AOM_ICDF(8420), AOM_ICDF(10706), AOM_ICDF(23788), AOM_ICDF(24321),
- AOM_ICDF(32768), },
- {AOM_ICDF(6153), AOM_ICDF(6850), AOM_ICDF(17983), AOM_ICDF(18530),
- AOM_ICDF(32768), },
- {AOM_ICDF(4168), AOM_ICDF(4524), AOM_ICDF(12547), AOM_ICDF(12983),
- AOM_ICDF(32768), },
- {AOM_ICDF(3136), AOM_ICDF(3480), AOM_ICDF(9221), AOM_ICDF(9659),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(18701), AOM_ICDF(23907), AOM_ICDF(31282), AOM_ICDF(31695),
- AOM_ICDF(32768), },
- {AOM_ICDF(12655), AOM_ICDF(19258), AOM_ICDF(29824), AOM_ICDF(30279),
- AOM_ICDF(32768), },
- {AOM_ICDF(8699), AOM_ICDF(11467), AOM_ICDF(24763), AOM_ICDF(25450),
- AOM_ICDF(32768), },
- {AOM_ICDF(6268), AOM_ICDF(7027), AOM_ICDF(18397), AOM_ICDF(19102),
- AOM_ICDF(32768), },
- {AOM_ICDF(5613), AOM_ICDF(6020), AOM_ICDF(14084), AOM_ICDF(14637),
- AOM_ICDF(32768), },
- {AOM_ICDF(2443), AOM_ICDF(2919), AOM_ICDF(8222), AOM_ICDF(8639),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(6156), AOM_ICDF(23586), AOM_ICDF(30739), AOM_ICDF(31476),
- AOM_ICDF(32768), },
- {AOM_ICDF(6056), AOM_ICDF(21852), AOM_ICDF(29323), AOM_ICDF(30442),
- AOM_ICDF(32768), },
- {AOM_ICDF(6113), AOM_ICDF(14408), AOM_ICDF(24331), AOM_ICDF(26899),
- AOM_ICDF(32768), },
- {AOM_ICDF(5825), AOM_ICDF(9328), AOM_ICDF(18946), AOM_ICDF(22143),
- AOM_ICDF(32768), },
- {AOM_ICDF(5023), AOM_ICDF(6340), AOM_ICDF(14812), AOM_ICDF(17429),
- AOM_ICDF(32768), },
- {AOM_ICDF(5140), AOM_ICDF(6104), AOM_ICDF(11565), AOM_ICDF(14135),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(12606), AOM_ICDF(20577), AOM_ICDF(21354), AOM_ICDF(29249),
- AOM_ICDF(29714), AOM_ICDF(32768), },
- {AOM_ICDF(8630), AOM_ICDF(17728), AOM_ICDF(19353), AOM_ICDF(27722),
- AOM_ICDF(28219), AOM_ICDF(32768), },
- {AOM_ICDF(3040), AOM_ICDF(12616), AOM_ICDF(14286), AOM_ICDF(23918),
- AOM_ICDF(24539), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(20824), AOM_ICDF(21610), AOM_ICDF(31110), AOM_ICDF(31445),
- AOM_ICDF(32768), },
- {AOM_ICDF(15597), AOM_ICDF(17692), AOM_ICDF(29670), AOM_ICDF(30015),
- AOM_ICDF(32768), },
- {AOM_ICDF(8954), AOM_ICDF(10007), AOM_ICDF(23515), AOM_ICDF(23902),
- AOM_ICDF(32768), },
- {AOM_ICDF(6693), AOM_ICDF(7282), AOM_ICDF(18144), AOM_ICDF(18537),
- AOM_ICDF(32768), },
- {AOM_ICDF(4048), AOM_ICDF(4451), AOM_ICDF(12255), AOM_ICDF(12626),
- AOM_ICDF(32768), },
- {AOM_ICDF(2619), AOM_ICDF(2960), AOM_ICDF(7084), AOM_ICDF(7429),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21628), AOM_ICDF(22786), AOM_ICDF(31520), AOM_ICDF(31865),
- AOM_ICDF(32768), },
- {AOM_ICDF(15854), AOM_ICDF(17925), AOM_ICDF(29872), AOM_ICDF(30228),
- AOM_ICDF(32768), },
- {AOM_ICDF(8120), AOM_ICDF(8815), AOM_ICDF(22575), AOM_ICDF(22964),
- AOM_ICDF(32768), },
- {AOM_ICDF(5006), AOM_ICDF(5427), AOM_ICDF(15724), AOM_ICDF(16101),
- AOM_ICDF(32768), },
- {AOM_ICDF(2967), AOM_ICDF(3311), AOM_ICDF(9553), AOM_ICDF(9913),
- AOM_ICDF(32768), },
- {AOM_ICDF(2878), AOM_ICDF(3188), AOM_ICDF(5418), AOM_ICDF(5825),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(21594), AOM_ICDF(23721), AOM_ICDF(31496), AOM_ICDF(31872),
- AOM_ICDF(32768), },
- {AOM_ICDF(15704), AOM_ICDF(18452), AOM_ICDF(30207), AOM_ICDF(30585),
- AOM_ICDF(32768), },
- {AOM_ICDF(8637), AOM_ICDF(9546), AOM_ICDF(23803), AOM_ICDF(24254),
- AOM_ICDF(32768), },
- {AOM_ICDF(5991), AOM_ICDF(6479), AOM_ICDF(17619), AOM_ICDF(18099),
- AOM_ICDF(32768), },
- {AOM_ICDF(3856), AOM_ICDF(4220), AOM_ICDF(11623), AOM_ICDF(12111),
- AOM_ICDF(32768), },
- {AOM_ICDF(3501), AOM_ICDF(3825), AOM_ICDF(6760), AOM_ICDF(7246),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(19929), AOM_ICDF(23849), AOM_ICDF(31581), AOM_ICDF(31956),
- AOM_ICDF(32768), },
- {AOM_ICDF(14239), AOM_ICDF(19461), AOM_ICDF(30323), AOM_ICDF(30761),
- AOM_ICDF(32768), },
- {AOM_ICDF(8094), AOM_ICDF(9844), AOM_ICDF(23595), AOM_ICDF(24338),
- AOM_ICDF(32768), },
- {AOM_ICDF(5204), AOM_ICDF(5848), AOM_ICDF(16396), AOM_ICDF(17121),
- AOM_ICDF(32768), },
- {AOM_ICDF(3568), AOM_ICDF(3961), AOM_ICDF(10658), AOM_ICDF(11301),
- AOM_ICDF(32768), },
- {AOM_ICDF(1594), AOM_ICDF(1913), AOM_ICDF(5552), AOM_ICDF(6040),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(13512), AOM_ICDF(24112), AOM_ICDF(31648), AOM_ICDF(32057),
- AOM_ICDF(32768), },
- {AOM_ICDF(10595), AOM_ICDF(22378), AOM_ICDF(30592), AOM_ICDF(31236),
- AOM_ICDF(32768), },
- {AOM_ICDF(7571), AOM_ICDF(13305), AOM_ICDF(24936), AOM_ICDF(26656),
- AOM_ICDF(32768), },
- {AOM_ICDF(6163), AOM_ICDF(8207), AOM_ICDF(18688), AOM_ICDF(20500),
- AOM_ICDF(32768), },
- {AOM_ICDF(3185), AOM_ICDF(4449), AOM_ICDF(13298), AOM_ICDF(14707),
- AOM_ICDF(32768), },
- {AOM_ICDF(1890), AOM_ICDF(2731), AOM_ICDF(7562), AOM_ICDF(8192),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(26689), AOM_ICDF(27259), AOM_ICDF(30590), AOM_ICDF(31538),
- AOM_ICDF(31930), AOM_ICDF(32768), },
- {AOM_ICDF(17843), AOM_ICDF(19709), AOM_ICDF(27299), AOM_ICDF(29813),
- AOM_ICDF(30435), AOM_ICDF(32768), },
- {AOM_ICDF(9138), AOM_ICDF(13232), AOM_ICDF(20487), AOM_ICDF(25798),
- AOM_ICDF(26794), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(13264), AOM_ICDF(22970), AOM_ICDF(30914), AOM_ICDF(31354),
- AOM_ICDF(32768), },
- {AOM_ICDF(11647), AOM_ICDF(20651), AOM_ICDF(30191), AOM_ICDF(30692),
- AOM_ICDF(32768), },
- {AOM_ICDF(10449), AOM_ICDF(15871), AOM_ICDF(27240), AOM_ICDF(27909),
- AOM_ICDF(32768), },
- {AOM_ICDF(7759), AOM_ICDF(9400), AOM_ICDF(22161), AOM_ICDF(22812),
- AOM_ICDF(32768), },
- {AOM_ICDF(4095), AOM_ICDF(4544), AOM_ICDF(13856), AOM_ICDF(14309),
- AOM_ICDF(32768), },
- {AOM_ICDF(3199), AOM_ICDF(3509), AOM_ICDF(8639), AOM_ICDF(8964),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18180), AOM_ICDF(25717), AOM_ICDF(31446), AOM_ICDF(31899),
- AOM_ICDF(32768), },
- {AOM_ICDF(14593), AOM_ICDF(22211), AOM_ICDF(30845), AOM_ICDF(31282),
- AOM_ICDF(32768), },
- {AOM_ICDF(10443), AOM_ICDF(13816), AOM_ICDF(27239), AOM_ICDF(27789),
- AOM_ICDF(32768), },
- {AOM_ICDF(6760), AOM_ICDF(7698), AOM_ICDF(19648), AOM_ICDF(20234),
- AOM_ICDF(32768), },
- {AOM_ICDF(3896), AOM_ICDF(4253), AOM_ICDF(12678), AOM_ICDF(13056),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(6722), AOM_ICDF(13443), AOM_ICDF(14704),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22145), AOM_ICDF(27566), AOM_ICDF(31813), AOM_ICDF(32212),
- AOM_ICDF(32768), },
- {AOM_ICDF(15241), AOM_ICDF(23215), AOM_ICDF(31215), AOM_ICDF(31658),
- AOM_ICDF(32768), },
- {AOM_ICDF(11148), AOM_ICDF(15527), AOM_ICDF(28336), AOM_ICDF(28891),
- AOM_ICDF(32768), },
- {AOM_ICDF(8864), AOM_ICDF(10402), AOM_ICDF(24069), AOM_ICDF(24811),
- AOM_ICDF(32768), },
- {AOM_ICDF(6919), AOM_ICDF(7527), AOM_ICDF(19607), AOM_ICDF(20260),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(18971), AOM_ICDF(25869),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(18795), AOM_ICDF(27901), AOM_ICDF(31907), AOM_ICDF(32272),
- AOM_ICDF(32768), },
- {AOM_ICDF(13177), AOM_ICDF(24166), AOM_ICDF(31395), AOM_ICDF(31820),
- AOM_ICDF(32768), },
- {AOM_ICDF(9217), AOM_ICDF(15410), AOM_ICDF(28101), AOM_ICDF(28868),
- AOM_ICDF(32768), },
- {AOM_ICDF(6328), AOM_ICDF(8749), AOM_ICDF(21695), AOM_ICDF(22954),
- AOM_ICDF(32768), },
- {AOM_ICDF(15672), AOM_ICDF(17809), AOM_ICDF(22795), AOM_ICDF(24932),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(9431), AOM_ICDF(28094), AOM_ICDF(31965), AOM_ICDF(32338),
- AOM_ICDF(32768), },
- {AOM_ICDF(8107), AOM_ICDF(26038), AOM_ICDF(31393), AOM_ICDF(32024),
- AOM_ICDF(32768), },
- {AOM_ICDF(9347), AOM_ICDF(19880), AOM_ICDF(28342), AOM_ICDF(29759),
- AOM_ICDF(32768), },
- {AOM_ICDF(7092), AOM_ICDF(13694), AOM_ICDF(25432), AOM_ICDF(28366),
- AOM_ICDF(32768), },
- {AOM_ICDF(7802), AOM_ICDF(12483), AOM_ICDF(21845), AOM_ICDF(26526),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(29212), AOM_ICDF(29998), AOM_ICDF(31256), AOM_ICDF(32035),
- AOM_ICDF(32360), AOM_ICDF(32768), },
- {AOM_ICDF(19150), AOM_ICDF(23189), AOM_ICDF(28117), AOM_ICDF(31168),
- AOM_ICDF(31611), AOM_ICDF(32768), },
- {AOM_ICDF(9324), AOM_ICDF(18178), AOM_ICDF(23556), AOM_ICDF(29422),
- AOM_ICDF(30204), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(20406), AOM_ICDF(26462), AOM_ICDF(31971), AOM_ICDF(32298),
- AOM_ICDF(32768), },
- {AOM_ICDF(15834), AOM_ICDF(22647), AOM_ICDF(31547), AOM_ICDF(31902),
- AOM_ICDF(32768), },
- {AOM_ICDF(11047), AOM_ICDF(15431), AOM_ICDF(27825), AOM_ICDF(28393),
- AOM_ICDF(32768), },
- {AOM_ICDF(8665), AOM_ICDF(11083), AOM_ICDF(22493), AOM_ICDF(23423),
- AOM_ICDF(32768), },
- {AOM_ICDF(6191), AOM_ICDF(7733), AOM_ICDF(16624), AOM_ICDF(17708),
- AOM_ICDF(32768), },
- {AOM_ICDF(3210), AOM_ICDF(3875), AOM_ICDF(10937), AOM_ICDF(11867),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21520), AOM_ICDF(27152), AOM_ICDF(31994), AOM_ICDF(32324),
- AOM_ICDF(32768), },
- {AOM_ICDF(17519), AOM_ICDF(23609), AOM_ICDF(31670), AOM_ICDF(32022),
- AOM_ICDF(32768), },
- {AOM_ICDF(10647), AOM_ICDF(14610), AOM_ICDF(28389), AOM_ICDF(28873),
- AOM_ICDF(32768), },
- {AOM_ICDF(7660), AOM_ICDF(10704), AOM_ICDF(22849), AOM_ICDF(23680),
- AOM_ICDF(32768), },
- {AOM_ICDF(5535), AOM_ICDF(6454), AOM_ICDF(17275), AOM_ICDF(17753),
- AOM_ICDF(32768), },
- {AOM_ICDF(4096), AOM_ICDF(6144), AOM_ICDF(13653), AOM_ICDF(15701),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22487), AOM_ICDF(27996), AOM_ICDF(32020), AOM_ICDF(32381),
- AOM_ICDF(32768), },
- {AOM_ICDF(17371), AOM_ICDF(24453), AOM_ICDF(31777), AOM_ICDF(32152),
- AOM_ICDF(32768), },
- {AOM_ICDF(11366), AOM_ICDF(16072), AOM_ICDF(29193), AOM_ICDF(29761),
- AOM_ICDF(32768), },
- {AOM_ICDF(12545), AOM_ICDF(13869), AOM_ICDF(24642), AOM_ICDF(25603),
- AOM_ICDF(32768), },
- {AOM_ICDF(4119), AOM_ICDF(5056), AOM_ICDF(16103), AOM_ICDF(17601),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(19350), AOM_ICDF(28517), AOM_ICDF(32050), AOM_ICDF(32401),
- AOM_ICDF(32768), },
- {AOM_ICDF(14752), AOM_ICDF(25831), AOM_ICDF(31897), AOM_ICDF(32261),
- AOM_ICDF(32768), },
- {AOM_ICDF(11157), AOM_ICDF(20816), AOM_ICDF(29821), AOM_ICDF(30635),
- AOM_ICDF(32768), },
- {AOM_ICDF(8157), AOM_ICDF(9691), AOM_ICDF(22868), AOM_ICDF(23705),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(10650), AOM_ICDF(17203), AOM_ICDF(19661),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(15557), AOM_ICDF(29043), AOM_ICDF(32047), AOM_ICDF(32424),
- AOM_ICDF(32768), },
- {AOM_ICDF(10253), AOM_ICDF(27948), AOM_ICDF(31922), AOM_ICDF(32329),
- AOM_ICDF(32768), },
- {AOM_ICDF(7797), AOM_ICDF(18860), AOM_ICDF(28870), AOM_ICDF(30661),
- AOM_ICDF(32768), },
- {AOM_ICDF(5617), AOM_ICDF(11235), AOM_ICDF(27151), AOM_ICDF(29959),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 8X8
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(20585), AOM_ICDF(21554), AOM_ICDF(27179), AOM_ICDF(28995),
- AOM_ICDF(30170), AOM_ICDF(32768), },
- {AOM_ICDF(6316), AOM_ICDF(8987), AOM_ICDF(15571), AOM_ICDF(19766),
- AOM_ICDF(21417), AOM_ICDF(32768), },
- {AOM_ICDF(1426), AOM_ICDF(4693), AOM_ICDF(6721), AOM_ICDF(11940),
- AOM_ICDF(12874), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(10177), AOM_ICDF(14297), AOM_ICDF(24926), AOM_ICDF(25396),
- AOM_ICDF(32768), },
- {AOM_ICDF(8812), AOM_ICDF(13381), AOM_ICDF(24128), AOM_ICDF(24649),
- AOM_ICDF(32768), },
- {AOM_ICDF(8090), AOM_ICDF(11314), AOM_ICDF(21329), AOM_ICDF(21906),
- AOM_ICDF(32768), },
- {AOM_ICDF(6324), AOM_ICDF(7511), AOM_ICDF(17212), AOM_ICDF(17717),
- AOM_ICDF(32768), },
- {AOM_ICDF(4272), AOM_ICDF(4718), AOM_ICDF(12016), AOM_ICDF(12415),
- AOM_ICDF(32768), },
- {AOM_ICDF(2129), AOM_ICDF(2445), AOM_ICDF(6433), AOM_ICDF(6755),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(15709), AOM_ICDF(18339), AOM_ICDF(28174), AOM_ICDF(28566),
- AOM_ICDF(32768), },
- {AOM_ICDF(12592), AOM_ICDF(15866), AOM_ICDF(27071), AOM_ICDF(27475),
- AOM_ICDF(32768), },
- {AOM_ICDF(9361), AOM_ICDF(10768), AOM_ICDF(22752), AOM_ICDF(23166),
- AOM_ICDF(32768), },
- {AOM_ICDF(6525), AOM_ICDF(7048), AOM_ICDF(17478), AOM_ICDF(17863),
- AOM_ICDF(32768), },
- {AOM_ICDF(4314), AOM_ICDF(4656), AOM_ICDF(12242), AOM_ICDF(12579),
- AOM_ICDF(32768), },
- {AOM_ICDF(2419), AOM_ICDF(2735), AOM_ICDF(7387), AOM_ICDF(7707),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(20453), AOM_ICDF(22253), AOM_ICDF(29963), AOM_ICDF(30329),
- AOM_ICDF(32768), },
- {AOM_ICDF(14090), AOM_ICDF(16483), AOM_ICDF(27992), AOM_ICDF(28355),
- AOM_ICDF(32768), },
- {AOM_ICDF(8737), AOM_ICDF(9396), AOM_ICDF(22134), AOM_ICDF(22499),
- AOM_ICDF(32768), },
- {AOM_ICDF(5543), AOM_ICDF(5904), AOM_ICDF(15783), AOM_ICDF(16122),
- AOM_ICDF(32768), },
- {AOM_ICDF(3358), AOM_ICDF(3677), AOM_ICDF(10362), AOM_ICDF(10680),
- AOM_ICDF(32768), },
- {AOM_ICDF(1875), AOM_ICDF(2187), AOM_ICDF(5982), AOM_ICDF(6294),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(23693), AOM_ICDF(25306), AOM_ICDF(31174), AOM_ICDF(31516),
- AOM_ICDF(32768), },
- {AOM_ICDF(14804), AOM_ICDF(16843), AOM_ICDF(28713), AOM_ICDF(29058),
- AOM_ICDF(32768), },
- {AOM_ICDF(8442), AOM_ICDF(8976), AOM_ICDF(22003), AOM_ICDF(22353),
- AOM_ICDF(32768), },
- {AOM_ICDF(5397), AOM_ICDF(5741), AOM_ICDF(15529), AOM_ICDF(15867),
- AOM_ICDF(32768), },
- {AOM_ICDF(3322), AOM_ICDF(3639), AOM_ICDF(10248), AOM_ICDF(10570),
- AOM_ICDF(32768), },
- {AOM_ICDF(1852), AOM_ICDF(2161), AOM_ICDF(5980), AOM_ICDF(6290),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(24219), AOM_ICDF(26214), AOM_ICDF(31501), AOM_ICDF(31844),
- AOM_ICDF(32768), },
- {AOM_ICDF(15202), AOM_ICDF(17709), AOM_ICDF(29450), AOM_ICDF(29807),
- AOM_ICDF(32768), },
- {AOM_ICDF(9044), AOM_ICDF(9603), AOM_ICDF(23134), AOM_ICDF(23506),
- AOM_ICDF(32768), },
- {AOM_ICDF(5849), AOM_ICDF(6187), AOM_ICDF(16695), AOM_ICDF(17032),
- AOM_ICDF(32768), },
- {AOM_ICDF(3734), AOM_ICDF(4050), AOM_ICDF(11408), AOM_ICDF(11727),
- AOM_ICDF(32768), },
- {AOM_ICDF(1898), AOM_ICDF(2201), AOM_ICDF(6126), AOM_ICDF(6430),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(10195), AOM_ICDF(21186), AOM_ICDF(23530), AOM_ICDF(29551),
- AOM_ICDF(30281), AOM_ICDF(32768), },
- {AOM_ICDF(3950), AOM_ICDF(15607), AOM_ICDF(18726), AOM_ICDF(26764),
- AOM_ICDF(27758), AOM_ICDF(32768), },
- {AOM_ICDF(942), AOM_ICDF(11209), AOM_ICDF(12954), AOM_ICDF(22126),
- AOM_ICDF(23296), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(24110), AOM_ICDF(24717), AOM_ICDF(31199), AOM_ICDF(31532),
- AOM_ICDF(32768), },
- {AOM_ICDF(16869), AOM_ICDF(18762), AOM_ICDF(29600), AOM_ICDF(29951),
- AOM_ICDF(32768), },
- {AOM_ICDF(10702), AOM_ICDF(12122), AOM_ICDF(25122), AOM_ICDF(25503),
- AOM_ICDF(32768), },
- {AOM_ICDF(8221), AOM_ICDF(9053), AOM_ICDF(20816), AOM_ICDF(21206),
- AOM_ICDF(32768), },
- {AOM_ICDF(5635), AOM_ICDF(6244), AOM_ICDF(15801), AOM_ICDF(16186),
- AOM_ICDF(32768), },
- {AOM_ICDF(3776), AOM_ICDF(4210), AOM_ICDF(10380), AOM_ICDF(10766),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24719), AOM_ICDF(25439), AOM_ICDF(31522), AOM_ICDF(31849),
- AOM_ICDF(32768), },
- {AOM_ICDF(16693), AOM_ICDF(18162), AOM_ICDF(29698), AOM_ICDF(30036),
- AOM_ICDF(32768), },
- {AOM_ICDF(9340), AOM_ICDF(10024), AOM_ICDF(23513), AOM_ICDF(23867),
- AOM_ICDF(32768), },
- {AOM_ICDF(6269), AOM_ICDF(6709), AOM_ICDF(17711), AOM_ICDF(18060),
- AOM_ICDF(32768), },
- {AOM_ICDF(3841), AOM_ICDF(4185), AOM_ICDF(11892), AOM_ICDF(12230),
- AOM_ICDF(32768), },
- {AOM_ICDF(1944), AOM_ICDF(2259), AOM_ICDF(6437), AOM_ICDF(6776),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25795), AOM_ICDF(26524), AOM_ICDF(31784), AOM_ICDF(32108),
- AOM_ICDF(32768), },
- {AOM_ICDF(17514), AOM_ICDF(18812), AOM_ICDF(30221), AOM_ICDF(30557),
- AOM_ICDF(32768), },
- {AOM_ICDF(9099), AOM_ICDF(9576), AOM_ICDF(23502), AOM_ICDF(23843),
- AOM_ICDF(32768), },
- {AOM_ICDF(5738), AOM_ICDF(6097), AOM_ICDF(16847), AOM_ICDF(17182),
- AOM_ICDF(32768), },
- {AOM_ICDF(3411), AOM_ICDF(3730), AOM_ICDF(10729), AOM_ICDF(11057),
- AOM_ICDF(32768), },
- {AOM_ICDF(1282), AOM_ICDF(1591), AOM_ICDF(4705), AOM_ICDF(5013),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26360), AOM_ICDF(27205), AOM_ICDF(31918), AOM_ICDF(32240),
- AOM_ICDF(32768), },
- {AOM_ICDF(18465), AOM_ICDF(19729), AOM_ICDF(30758), AOM_ICDF(31089),
- AOM_ICDF(32768), },
- {AOM_ICDF(9488), AOM_ICDF(9915), AOM_ICDF(24339), AOM_ICDF(24678),
- AOM_ICDF(32768), },
- {AOM_ICDF(5812), AOM_ICDF(6156), AOM_ICDF(17325), AOM_ICDF(17661),
- AOM_ICDF(32768), },
- {AOM_ICDF(3739), AOM_ICDF(4065), AOM_ICDF(10932), AOM_ICDF(11265),
- AOM_ICDF(32768), },
- {AOM_ICDF(1391), AOM_ICDF(1700), AOM_ICDF(4764), AOM_ICDF(5073),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27036), AOM_ICDF(28212), AOM_ICDF(31970), AOM_ICDF(32305),
- AOM_ICDF(32768), },
- {AOM_ICDF(18634), AOM_ICDF(21073), AOM_ICDF(31116), AOM_ICDF(31477),
- AOM_ICDF(32768), },
- {AOM_ICDF(9822), AOM_ICDF(10441), AOM_ICDF(24990), AOM_ICDF(25437),
- AOM_ICDF(32768), },
- {AOM_ICDF(6130), AOM_ICDF(6530), AOM_ICDF(17790), AOM_ICDF(18269),
- AOM_ICDF(32768), },
- {AOM_ICDF(3725), AOM_ICDF(4044), AOM_ICDF(11127), AOM_ICDF(11602),
- AOM_ICDF(32768), },
- {AOM_ICDF(1298), AOM_ICDF(1573), AOM_ICDF(4642), AOM_ICDF(5075),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(23042), AOM_ICDF(23702), AOM_ICDF(30487), AOM_ICDF(31370),
- AOM_ICDF(31898), AOM_ICDF(32768), },
- {AOM_ICDF(15512), AOM_ICDF(17357), AOM_ICDF(27018), AOM_ICDF(29404),
- AOM_ICDF(30377), AOM_ICDF(32768), },
- {AOM_ICDF(8935), AOM_ICDF(12713), AOM_ICDF(20545), AOM_ICDF(25580),
- AOM_ICDF(26931), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(15021), AOM_ICDF(24086), AOM_ICDF(30796), AOM_ICDF(31272),
- AOM_ICDF(32768), },
- {AOM_ICDF(13040), AOM_ICDF(21866), AOM_ICDF(30054), AOM_ICDF(30686),
- AOM_ICDF(32768), },
- {AOM_ICDF(10915), AOM_ICDF(16852), AOM_ICDF(27467), AOM_ICDF(28235),
- AOM_ICDF(32768), },
- {AOM_ICDF(8096), AOM_ICDF(10403), AOM_ICDF(22531), AOM_ICDF(23355),
- AOM_ICDF(32768), },
- {AOM_ICDF(4485), AOM_ICDF(5020), AOM_ICDF(13360), AOM_ICDF(13816),
- AOM_ICDF(32768), },
- {AOM_ICDF(1728), AOM_ICDF(2067), AOM_ICDF(5998), AOM_ICDF(6337),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20845), AOM_ICDF(25929), AOM_ICDF(31278), AOM_ICDF(31670),
- AOM_ICDF(32768), },
- {AOM_ICDF(15553), AOM_ICDF(21602), AOM_ICDF(30338), AOM_ICDF(30745),
- AOM_ICDF(32768), },
- {AOM_ICDF(10953), AOM_ICDF(13829), AOM_ICDF(26398), AOM_ICDF(26854),
- AOM_ICDF(32768), },
- {AOM_ICDF(7900), AOM_ICDF(8858), AOM_ICDF(20869), AOM_ICDF(21378),
- AOM_ICDF(32768), },
- {AOM_ICDF(5225), AOM_ICDF(5579), AOM_ICDF(13764), AOM_ICDF(14087),
- AOM_ICDF(32768), },
- {AOM_ICDF(1881), AOM_ICDF(2352), AOM_ICDF(6742), AOM_ICDF(7212),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25402), AOM_ICDF(28169), AOM_ICDF(31825), AOM_ICDF(32169),
- AOM_ICDF(32768), },
- {AOM_ICDF(17086), AOM_ICDF(21375), AOM_ICDF(30582), AOM_ICDF(30951),
- AOM_ICDF(32768), },
- {AOM_ICDF(11057), AOM_ICDF(12358), AOM_ICDF(25930), AOM_ICDF(26346),
- AOM_ICDF(32768), },
- {AOM_ICDF(6989), AOM_ICDF(7448), AOM_ICDF(18814), AOM_ICDF(19143),
- AOM_ICDF(32768), },
- {AOM_ICDF(4476), AOM_ICDF(4752), AOM_ICDF(16025), AOM_ICDF(16301),
- AOM_ICDF(32768), },
- {AOM_ICDF(2185), AOM_ICDF(4369), AOM_ICDF(12379), AOM_ICDF(14564),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26444), AOM_ICDF(28656), AOM_ICDF(31864), AOM_ICDF(32231),
- AOM_ICDF(32768), },
- {AOM_ICDF(17642), AOM_ICDF(20848), AOM_ICDF(30615), AOM_ICDF(30967),
- AOM_ICDF(32768), },
- {AOM_ICDF(10973), AOM_ICDF(11732), AOM_ICDF(25256), AOM_ICDF(25612),
- AOM_ICDF(32768), },
- {AOM_ICDF(8325), AOM_ICDF(8726), AOM_ICDF(19826), AOM_ICDF(20146),
- AOM_ICDF(32768), },
- {AOM_ICDF(5294), AOM_ICDF(5568), AOM_ICDF(14056), AOM_ICDF(14330),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(18204), AOM_ICDF(23666),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27760), AOM_ICDF(29748), AOM_ICDF(31934), AOM_ICDF(32299),
- AOM_ICDF(32768), },
- {AOM_ICDF(17133), AOM_ICDF(21599), AOM_ICDF(30800), AOM_ICDF(31243),
- AOM_ICDF(32768), },
- {AOM_ICDF(12224), AOM_ICDF(13907), AOM_ICDF(26992), AOM_ICDF(27546),
- AOM_ICDF(32768), },
- {AOM_ICDF(9221), AOM_ICDF(9617), AOM_ICDF(21845), AOM_ICDF(22162),
- AOM_ICDF(32768), },
- {AOM_ICDF(5401), AOM_ICDF(6482), AOM_ICDF(18004), AOM_ICDF(19085),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(29286), AOM_ICDF(29932), AOM_ICDF(31576), AOM_ICDF(32075),
- AOM_ICDF(32408), AOM_ICDF(32768), },
- {AOM_ICDF(17969), AOM_ICDF(21693), AOM_ICDF(28937), AOM_ICDF(30945),
- AOM_ICDF(31682), AOM_ICDF(32768), },
- {AOM_ICDF(6607), AOM_ICDF(16160), AOM_ICDF(23280), AOM_ICDF(27595),
- AOM_ICDF(30027), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(24724), AOM_ICDF(28333), AOM_ICDF(32022), AOM_ICDF(32346),
- AOM_ICDF(32768), },
- {AOM_ICDF(18803), AOM_ICDF(24728), AOM_ICDF(31661), AOM_ICDF(32022),
- AOM_ICDF(32768), },
- {AOM_ICDF(14179), AOM_ICDF(20757), AOM_ICDF(30098), AOM_ICDF(30633),
- AOM_ICDF(32768), },
- {AOM_ICDF(12564), AOM_ICDF(17179), AOM_ICDF(27133), AOM_ICDF(28080),
- AOM_ICDF(32768), },
- {AOM_ICDF(10543), AOM_ICDF(13479), AOM_ICDF(23725), AOM_ICDF(25031),
- AOM_ICDF(32768), },
- {AOM_ICDF(11377), AOM_ICDF(12741), AOM_ICDF(21923), AOM_ICDF(22888),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(26071), AOM_ICDF(28609), AOM_ICDF(32053), AOM_ICDF(32374),
- AOM_ICDF(32768), },
- {AOM_ICDF(20389), AOM_ICDF(24820), AOM_ICDF(31690), AOM_ICDF(32027),
- AOM_ICDF(32768), },
- {AOM_ICDF(12977), AOM_ICDF(16892), AOM_ICDF(29053), AOM_ICDF(29445),
- AOM_ICDF(32768), },
- {AOM_ICDF(8745), AOM_ICDF(12303), AOM_ICDF(24164), AOM_ICDF(25209),
- AOM_ICDF(32768), },
- {AOM_ICDF(4042), AOM_ICDF(5052), AOM_ICDF(18333), AOM_ICDF(18910),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(9557), AOM_ICDF(13653), AOM_ICDF(17749),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(27936), AOM_ICDF(29582), AOM_ICDF(32107), AOM_ICDF(32422),
- AOM_ICDF(32768), },
- {AOM_ICDF(22472), AOM_ICDF(25761), AOM_ICDF(31858), AOM_ICDF(32177),
- AOM_ICDF(32768), },
- {AOM_ICDF(14107), AOM_ICDF(16587), AOM_ICDF(29250), AOM_ICDF(29692),
- AOM_ICDF(32768), },
- {AOM_ICDF(10726), AOM_ICDF(11739), AOM_ICDF(23985), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- {AOM_ICDF(5825), AOM_ICDF(8010), AOM_ICDF(18204), AOM_ICDF(20389),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27066), AOM_ICDF(29025), AOM_ICDF(31972), AOM_ICDF(32338),
- AOM_ICDF(32768), },
- {AOM_ICDF(20639), AOM_ICDF(23330), AOM_ICDF(31616), AOM_ICDF(31985),
- AOM_ICDF(32768), },
- {AOM_ICDF(13468), AOM_ICDF(15091), AOM_ICDF(29902), AOM_ICDF(30243),
- AOM_ICDF(32768), },
- {AOM_ICDF(14473), AOM_ICDF(15019), AOM_ICDF(24030), AOM_ICDF(24439),
- AOM_ICDF(32768), },
- {AOM_ICDF(7864), AOM_ICDF(11796), AOM_ICDF(19661), AOM_ICDF(23593),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(28741), AOM_ICDF(30503), AOM_ICDF(32039), AOM_ICDF(32388),
- AOM_ICDF(32768), },
- {AOM_ICDF(19712), AOM_ICDF(25328), AOM_ICDF(31621), AOM_ICDF(32049),
- AOM_ICDF(32768), },
- {AOM_ICDF(13461), AOM_ICDF(17167), AOM_ICDF(29712), AOM_ICDF(30308),
- AOM_ICDF(32768), },
- {AOM_ICDF(10285), AOM_ICDF(11242), AOM_ICDF(27267), AOM_ICDF(28224),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 16X16
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(4353), AOM_ICDF(7056), AOM_ICDF(15884), AOM_ICDF(20594),
- AOM_ICDF(24026), AOM_ICDF(32768), },
- {AOM_ICDF(2397), AOM_ICDF(5417), AOM_ICDF(9610), AOM_ICDF(14451),
- AOM_ICDF(16689), AOM_ICDF(32768), },
- {AOM_ICDF(841), AOM_ICDF(3543), AOM_ICDF(4598), AOM_ICDF(9149),
- AOM_ICDF(9950), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(8763), AOM_ICDF(11845), AOM_ICDF(22684), AOM_ICDF(23211),
- AOM_ICDF(32768), },
- {AOM_ICDF(8074), AOM_ICDF(12129), AOM_ICDF(22232), AOM_ICDF(22924),
- AOM_ICDF(32768), },
- {AOM_ICDF(7453), AOM_ICDF(10017), AOM_ICDF(19822), AOM_ICDF(20662),
- AOM_ICDF(32768), },
- {AOM_ICDF(5825), AOM_ICDF(6998), AOM_ICDF(16346), AOM_ICDF(16952),
- AOM_ICDF(32768), },
- {AOM_ICDF(4059), AOM_ICDF(4481), AOM_ICDF(11444), AOM_ICDF(11852),
- AOM_ICDF(32768), },
- {AOM_ICDF(1973), AOM_ICDF(2289), AOM_ICDF(5827), AOM_ICDF(6149),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(15272), AOM_ICDF(17017), AOM_ICDF(26959), AOM_ICDF(27346),
- AOM_ICDF(32768), },
- {AOM_ICDF(12476), AOM_ICDF(14916), AOM_ICDF(26163), AOM_ICDF(26575),
- AOM_ICDF(32768), },
- {AOM_ICDF(9485), AOM_ICDF(10720), AOM_ICDF(22557), AOM_ICDF(22973),
- AOM_ICDF(32768), },
- {AOM_ICDF(6821), AOM_ICDF(7342), AOM_ICDF(17484), AOM_ICDF(17858),
- AOM_ICDF(32768), },
- {AOM_ICDF(4370), AOM_ICDF(4714), AOM_ICDF(12030), AOM_ICDF(12366),
- AOM_ICDF(32768), },
- {AOM_ICDF(2375), AOM_ICDF(2688), AOM_ICDF(6850), AOM_ICDF(7162),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19929), AOM_ICDF(21244), AOM_ICDF(29489), AOM_ICDF(29829),
- AOM_ICDF(32768), },
- {AOM_ICDF(14005), AOM_ICDF(16066), AOM_ICDF(27595), AOM_ICDF(27947),
- AOM_ICDF(32768), },
- {AOM_ICDF(8918), AOM_ICDF(9550), AOM_ICDF(22126), AOM_ICDF(22488),
- AOM_ICDF(32768), },
- {AOM_ICDF(5741), AOM_ICDF(6095), AOM_ICDF(16004), AOM_ICDF(16340),
- AOM_ICDF(32768), },
- {AOM_ICDF(3558), AOM_ICDF(3873), AOM_ICDF(10340), AOM_ICDF(10657),
- AOM_ICDF(32768), },
- {AOM_ICDF(1822), AOM_ICDF(2134), AOM_ICDF(5530), AOM_ICDF(5843),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(23568), AOM_ICDF(24663), AOM_ICDF(30915), AOM_ICDF(31245),
- AOM_ICDF(32768), },
- {AOM_ICDF(15139), AOM_ICDF(16577), AOM_ICDF(28661), AOM_ICDF(28997),
- AOM_ICDF(32768), },
- {AOM_ICDF(8850), AOM_ICDF(9259), AOM_ICDF(22366), AOM_ICDF(22700),
- AOM_ICDF(32768), },
- {AOM_ICDF(5454), AOM_ICDF(5781), AOM_ICDF(15617), AOM_ICDF(15937),
- AOM_ICDF(32768), },
- {AOM_ICDF(3315), AOM_ICDF(3629), AOM_ICDF(10044), AOM_ICDF(10359),
- AOM_ICDF(32768), },
- {AOM_ICDF(1736), AOM_ICDF(2047), AOM_ICDF(5698), AOM_ICDF(6009),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27011), AOM_ICDF(27875), AOM_ICDF(31721), AOM_ICDF(32046),
- AOM_ICDF(32768), },
- {AOM_ICDF(16855), AOM_ICDF(18018), AOM_ICDF(29676), AOM_ICDF(30005),
- AOM_ICDF(32768), },
- {AOM_ICDF(8916), AOM_ICDF(9282), AOM_ICDF(22431), AOM_ICDF(22760),
- AOM_ICDF(32768), },
- {AOM_ICDF(5391), AOM_ICDF(5710), AOM_ICDF(15343), AOM_ICDF(15662),
- AOM_ICDF(32768), },
- {AOM_ICDF(3316), AOM_ICDF(3629), AOM_ICDF(10223), AOM_ICDF(10537),
- AOM_ICDF(32768), },
- {AOM_ICDF(1891), AOM_ICDF(2202), AOM_ICDF(6076), AOM_ICDF(6387),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(5744), AOM_ICDF(15508), AOM_ICDF(23294), AOM_ICDF(28653),
- AOM_ICDF(30781), AOM_ICDF(32768), },
- {AOM_ICDF(2130), AOM_ICDF(11786), AOM_ICDF(17337), AOM_ICDF(24444),
- AOM_ICDF(27499), AOM_ICDF(32768), },
- {AOM_ICDF(615), AOM_ICDF(8230), AOM_ICDF(10191), AOM_ICDF(18291),
- AOM_ICDF(21029), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(25149), AOM_ICDF(25880), AOM_ICDF(31110), AOM_ICDF(31453),
- AOM_ICDF(32768), },
- {AOM_ICDF(17454), AOM_ICDF(20460), AOM_ICDF(29560), AOM_ICDF(29929),
- AOM_ICDF(32768), },
- {AOM_ICDF(11724), AOM_ICDF(14294), AOM_ICDF(25947), AOM_ICDF(26377),
- AOM_ICDF(32768), },
- {AOM_ICDF(9198), AOM_ICDF(10981), AOM_ICDF(22357), AOM_ICDF(22857),
- AOM_ICDF(32768), },
- {AOM_ICDF(7164), AOM_ICDF(8069), AOM_ICDF(18345), AOM_ICDF(18857),
- AOM_ICDF(32768), },
- {AOM_ICDF(5833), AOM_ICDF(6316), AOM_ICDF(14661), AOM_ICDF(15073),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(26117), AOM_ICDF(26928), AOM_ICDF(31526), AOM_ICDF(31850),
- AOM_ICDF(32768), },
- {AOM_ICDF(16540), AOM_ICDF(18394), AOM_ICDF(29402), AOM_ICDF(29740),
- AOM_ICDF(32768), },
- {AOM_ICDF(9908), AOM_ICDF(10886), AOM_ICDF(23865), AOM_ICDF(24223),
- AOM_ICDF(32768), },
- {AOM_ICDF(6805), AOM_ICDF(7383), AOM_ICDF(18402), AOM_ICDF(18777),
- AOM_ICDF(32768), },
- {AOM_ICDF(4259), AOM_ICDF(4638), AOM_ICDF(12791), AOM_ICDF(13136),
- AOM_ICDF(32768), },
- {AOM_ICDF(2274), AOM_ICDF(2584), AOM_ICDF(7391), AOM_ICDF(7713),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(27129), AOM_ICDF(27797), AOM_ICDF(31745), AOM_ICDF(32063),
- AOM_ICDF(32768), },
- {AOM_ICDF(17255), AOM_ICDF(18663), AOM_ICDF(29815), AOM_ICDF(30145),
- AOM_ICDF(32768), },
- {AOM_ICDF(9538), AOM_ICDF(10091), AOM_ICDF(23590), AOM_ICDF(23931),
- AOM_ICDF(32768), },
- {AOM_ICDF(6366), AOM_ICDF(6732), AOM_ICDF(17467), AOM_ICDF(17800),
- AOM_ICDF(32768), },
- {AOM_ICDF(3701), AOM_ICDF(4018), AOM_ICDF(11326), AOM_ICDF(11652),
- AOM_ICDF(32768), },
- {AOM_ICDF(1976), AOM_ICDF(2284), AOM_ICDF(6325), AOM_ICDF(6633),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27944), AOM_ICDF(28479), AOM_ICDF(31894), AOM_ICDF(32211),
- AOM_ICDF(32768), },
- {AOM_ICDF(18032), AOM_ICDF(18997), AOM_ICDF(30130), AOM_ICDF(30452),
- AOM_ICDF(32768), },
- {AOM_ICDF(9467), AOM_ICDF(9842), AOM_ICDF(23729), AOM_ICDF(24051),
- AOM_ICDF(32768), },
- {AOM_ICDF(5900), AOM_ICDF(6226), AOM_ICDF(16797), AOM_ICDF(17116),
- AOM_ICDF(32768), },
- {AOM_ICDF(3282), AOM_ICDF(3595), AOM_ICDF(10418), AOM_ICDF(10730),
- AOM_ICDF(32768), },
- {AOM_ICDF(2289), AOM_ICDF(2601), AOM_ICDF(6048), AOM_ICDF(6360),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29278), AOM_ICDF(29837), AOM_ICDF(32038), AOM_ICDF(32360),
- AOM_ICDF(32768), },
- {AOM_ICDF(19805), AOM_ICDF(20846), AOM_ICDF(31007), AOM_ICDF(31343),
- AOM_ICDF(32768), },
- {AOM_ICDF(9976), AOM_ICDF(10433), AOM_ICDF(24483), AOM_ICDF(24848),
- AOM_ICDF(32768), },
- {AOM_ICDF(5971), AOM_ICDF(6354), AOM_ICDF(17184), AOM_ICDF(17539),
- AOM_ICDF(32768), },
- {AOM_ICDF(3497), AOM_ICDF(4693), AOM_ICDF(11940), AOM_ICDF(12291),
- AOM_ICDF(32768), },
- {AOM_ICDF(1776), AOM_ICDF(2357), AOM_ICDF(6260), AOM_ICDF(6918),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(23166), AOM_ICDF(23821), AOM_ICDF(30269), AOM_ICDF(31075),
- AOM_ICDF(31847), AOM_ICDF(32768), },
- {AOM_ICDF(14510), AOM_ICDF(16494), AOM_ICDF(25635), AOM_ICDF(28335),
- AOM_ICDF(29759), AOM_ICDF(32768), },
- {AOM_ICDF(7730), AOM_ICDF(12354), AOM_ICDF(18089), AOM_ICDF(24005),
- AOM_ICDF(25442), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17908), AOM_ICDF(24824), AOM_ICDF(30533), AOM_ICDF(31042),
- AOM_ICDF(32768), },
- {AOM_ICDF(13950), AOM_ICDF(22899), AOM_ICDF(29969), AOM_ICDF(30646),
- AOM_ICDF(32768), },
- {AOM_ICDF(11728), AOM_ICDF(17834), AOM_ICDF(27214), AOM_ICDF(28218),
- AOM_ICDF(32768), },
- {AOM_ICDF(9581), AOM_ICDF(12074), AOM_ICDF(23689), AOM_ICDF(24616),
- AOM_ICDF(32768), },
- {AOM_ICDF(6193), AOM_ICDF(6855), AOM_ICDF(16430), AOM_ICDF(16955),
- AOM_ICDF(32768), },
- {AOM_ICDF(3393), AOM_ICDF(3712), AOM_ICDF(8802), AOM_ICDF(9226),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23368), AOM_ICDF(26826), AOM_ICDF(31183), AOM_ICDF(31579),
- AOM_ICDF(32768), },
- {AOM_ICDF(16523), AOM_ICDF(21603), AOM_ICDF(30044), AOM_ICDF(30503),
- AOM_ICDF(32768), },
- {AOM_ICDF(11171), AOM_ICDF(14152), AOM_ICDF(27009), AOM_ICDF(27644),
- AOM_ICDF(32768), },
- {AOM_ICDF(8523), AOM_ICDF(9348), AOM_ICDF(21021), AOM_ICDF(21595),
- AOM_ICDF(32768), },
- {AOM_ICDF(4780), AOM_ICDF(5196), AOM_ICDF(13440), AOM_ICDF(13786),
- AOM_ICDF(32768), },
- {AOM_ICDF(4328), AOM_ICDF(5255), AOM_ICDF(10820), AOM_ICDF(11747),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(27020), AOM_ICDF(28644), AOM_ICDF(31643), AOM_ICDF(31990),
- AOM_ICDF(32768), },
- {AOM_ICDF(18016), AOM_ICDF(21678), AOM_ICDF(30346), AOM_ICDF(30712),
- AOM_ICDF(32768), },
- {AOM_ICDF(10497), AOM_ICDF(11555), AOM_ICDF(24827), AOM_ICDF(25156),
- AOM_ICDF(32768), },
- {AOM_ICDF(6370), AOM_ICDF(6703), AOM_ICDF(18612), AOM_ICDF(18903),
- AOM_ICDF(32768), },
- {AOM_ICDF(5355), AOM_ICDF(5738), AOM_ICDF(14790), AOM_ICDF(15173),
- AOM_ICDF(32768), },
- {AOM_ICDF(3486), AOM_ICDF(5578), AOM_ICDF(11155), AOM_ICDF(13247),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28933), AOM_ICDF(29746), AOM_ICDF(31882), AOM_ICDF(32203),
- AOM_ICDF(32768), },
- {AOM_ICDF(18171), AOM_ICDF(20286), AOM_ICDF(29713), AOM_ICDF(30052),
- AOM_ICDF(32768), },
- {AOM_ICDF(9732), AOM_ICDF(10163), AOM_ICDF(23952), AOM_ICDF(24275),
- AOM_ICDF(32768), },
- {AOM_ICDF(6084), AOM_ICDF(6480), AOM_ICDF(17459), AOM_ICDF(17771),
- AOM_ICDF(32768), },
- {AOM_ICDF(3250), AOM_ICDF(3656), AOM_ICDF(10291), AOM_ICDF(10697),
- AOM_ICDF(32768), },
- {AOM_ICDF(4681), AOM_ICDF(8192), AOM_ICDF(15214), AOM_ICDF(18725),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29940), AOM_ICDF(30510), AOM_ICDF(31933), AOM_ICDF(32260),
- AOM_ICDF(32768), },
- {AOM_ICDF(17688), AOM_ICDF(19258), AOM_ICDF(29757), AOM_ICDF(30125),
- AOM_ICDF(32768), },
- {AOM_ICDF(9668), AOM_ICDF(10798), AOM_ICDF(24231), AOM_ICDF(24605),
- AOM_ICDF(32768), },
- {AOM_ICDF(7580), AOM_ICDF(7942), AOM_ICDF(19364), AOM_ICDF(19692),
- AOM_ICDF(32768), },
- {AOM_ICDF(6043), AOM_ICDF(6446), AOM_ICDF(15578), AOM_ICDF(15981),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(28553), AOM_ICDF(29151), AOM_ICDF(31521), AOM_ICDF(32038),
- AOM_ICDF(32413), AOM_ICDF(32768), },
- {AOM_ICDF(15138), AOM_ICDF(19554), AOM_ICDF(27559), AOM_ICDF(29750),
- AOM_ICDF(31321), AOM_ICDF(32768), },
- {AOM_ICDF(3406), AOM_ICDF(18680), AOM_ICDF(23310), AOM_ICDF(27259),
- AOM_ICDF(30430), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(29000), AOM_ICDF(30219), AOM_ICDF(32098), AOM_ICDF(32414),
- AOM_ICDF(32768), },
- {AOM_ICDF(21324), AOM_ICDF(25278), AOM_ICDF(31789), AOM_ICDF(32126),
- AOM_ICDF(32768), },
- {AOM_ICDF(14011), AOM_ICDF(21190), AOM_ICDF(30288), AOM_ICDF(30900),
- AOM_ICDF(32768), },
- {AOM_ICDF(12762), AOM_ICDF(18476), AOM_ICDF(27140), AOM_ICDF(28461),
- AOM_ICDF(32768), },
- {AOM_ICDF(11498), AOM_ICDF(14867), AOM_ICDF(24806), AOM_ICDF(25613),
- AOM_ICDF(32768), },
- {AOM_ICDF(15872), AOM_ICDF(16512), AOM_ICDF(24192), AOM_ICDF(25088),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(29308), AOM_ICDF(30286), AOM_ICDF(32095), AOM_ICDF(32410),
- AOM_ICDF(32768), },
- {AOM_ICDF(21819), AOM_ICDF(24215), AOM_ICDF(31771), AOM_ICDF(32103),
- AOM_ICDF(32768), },
- {AOM_ICDF(14853), AOM_ICDF(18028), AOM_ICDF(29729), AOM_ICDF(30160),
- AOM_ICDF(32768), },
- {AOM_ICDF(10598), AOM_ICDF(13400), AOM_ICDF(26555), AOM_ICDF(27043),
- AOM_ICDF(32768), },
- {AOM_ICDF(10426), AOM_ICDF(12660), AOM_ICDF(21597), AOM_ICDF(23831),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(29866), AOM_ICDF(30588), AOM_ICDF(32131), AOM_ICDF(32445),
- AOM_ICDF(32768), },
- {AOM_ICDF(23473), AOM_ICDF(25323), AOM_ICDF(31960), AOM_ICDF(32280),
- AOM_ICDF(32768), },
- {AOM_ICDF(17529), AOM_ICDF(19173), AOM_ICDF(30278), AOM_ICDF(30577),
- AOM_ICDF(32768), },
- {AOM_ICDF(9830), AOM_ICDF(11469), AOM_ICDF(23484), AOM_ICDF(25122),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(30405), AOM_ICDF(31032), AOM_ICDF(32139), AOM_ICDF(32451),
- AOM_ICDF(32768), },
- {AOM_ICDF(25453), AOM_ICDF(27199), AOM_ICDF(32040), AOM_ICDF(32361),
- AOM_ICDF(32768), },
- {AOM_ICDF(15663), AOM_ICDF(16432), AOM_ICDF(30654), AOM_ICDF(31038),
- AOM_ICDF(32768), },
- {AOM_ICDF(6780), AOM_ICDF(10169), AOM_ICDF(18079), AOM_ICDF(21469),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29785), AOM_ICDF(30368), AOM_ICDF(31904), AOM_ICDF(32245),
- AOM_ICDF(32768), },
- {AOM_ICDF(18173), AOM_ICDF(21111), AOM_ICDF(30105), AOM_ICDF(30575),
- AOM_ICDF(32768), },
- {AOM_ICDF(8476), AOM_ICDF(13666), AOM_ICDF(28420), AOM_ICDF(28896),
- AOM_ICDF(32768), },
- {AOM_ICDF(11427), AOM_ICDF(12066), AOM_ICDF(26197), AOM_ICDF(26691),
- AOM_ICDF(32768), },
- {AOM_ICDF(6827), AOM_ICDF(10923), AOM_ICDF(21845), AOM_ICDF(25941),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 32X32
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(7848), AOM_ICDF(9841), AOM_ICDF(13623), AOM_ICDF(19351),
- AOM_ICDF(23196), AOM_ICDF(32768), },
- {AOM_ICDF(3229), AOM_ICDF(5641), AOM_ICDF(7103), AOM_ICDF(13195),
- AOM_ICDF(15046), AOM_ICDF(32768), },
- {AOM_ICDF(810), AOM_ICDF(3129), AOM_ICDF(3687), AOM_ICDF(8373),
- AOM_ICDF(8971), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(8165), AOM_ICDF(12626), AOM_ICDF(22213), AOM_ICDF(23403),
- AOM_ICDF(32768), },
- {AOM_ICDF(7602), AOM_ICDF(15378), AOM_ICDF(23248), AOM_ICDF(24331),
- AOM_ICDF(32768), },
- {AOM_ICDF(5607), AOM_ICDF(10197), AOM_ICDF(18657), AOM_ICDF(20616),
- AOM_ICDF(32768), },
- {AOM_ICDF(4498), AOM_ICDF(6539), AOM_ICDF(14461), AOM_ICDF(16104),
- AOM_ICDF(32768), },
- {AOM_ICDF(3387), AOM_ICDF(4098), AOM_ICDF(10245), AOM_ICDF(11322),
- AOM_ICDF(32768), },
- {AOM_ICDF(1793), AOM_ICDF(2111), AOM_ICDF(5262), AOM_ICDF(5646),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(16815), AOM_ICDF(19141), AOM_ICDF(27640), AOM_ICDF(28110),
- AOM_ICDF(32768), },
- {AOM_ICDF(13156), AOM_ICDF(15592), AOM_ICDF(26089), AOM_ICDF(26592),
- AOM_ICDF(32768), },
- {AOM_ICDF(9841), AOM_ICDF(11588), AOM_ICDF(22858), AOM_ICDF(23403),
- AOM_ICDF(32768), },
- {AOM_ICDF(7765), AOM_ICDF(8871), AOM_ICDF(19127), AOM_ICDF(19526),
- AOM_ICDF(32768), },
- {AOM_ICDF(5550), AOM_ICDF(6013), AOM_ICDF(14338), AOM_ICDF(14677),
- AOM_ICDF(32768), },
- {AOM_ICDF(2658), AOM_ICDF(2969), AOM_ICDF(7230), AOM_ICDF(7541),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22765), AOM_ICDF(24278), AOM_ICDF(30194), AOM_ICDF(30535),
- AOM_ICDF(32768), },
- {AOM_ICDF(15310), AOM_ICDF(17292), AOM_ICDF(27870), AOM_ICDF(28248),
- AOM_ICDF(32768), },
- {AOM_ICDF(10047), AOM_ICDF(10839), AOM_ICDF(23345), AOM_ICDF(23710),
- AOM_ICDF(32768), },
- {AOM_ICDF(6594), AOM_ICDF(6959), AOM_ICDF(17456), AOM_ICDF(17796),
- AOM_ICDF(32768), },
- {AOM_ICDF(3784), AOM_ICDF(4109), AOM_ICDF(10984), AOM_ICDF(11297),
- AOM_ICDF(32768), },
- {AOM_ICDF(1569), AOM_ICDF(1875), AOM_ICDF(4586), AOM_ICDF(4892),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25747), AOM_ICDF(26817), AOM_ICDF(31236), AOM_ICDF(31577),
- AOM_ICDF(32768), },
- {AOM_ICDF(16018), AOM_ICDF(17720), AOM_ICDF(28833), AOM_ICDF(29219),
- AOM_ICDF(32768), },
- {AOM_ICDF(9348), AOM_ICDF(10015), AOM_ICDF(22943), AOM_ICDF(23323),
- AOM_ICDF(32768), },
- {AOM_ICDF(5841), AOM_ICDF(6167), AOM_ICDF(15774), AOM_ICDF(16107),
- AOM_ICDF(32768), },
- {AOM_ICDF(3385), AOM_ICDF(3703), AOM_ICDF(9664), AOM_ICDF(9975),
- AOM_ICDF(32768), },
- {AOM_ICDF(1460), AOM_ICDF(1768), AOM_ICDF(4704), AOM_ICDF(5011),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29634), AOM_ICDF(30134), AOM_ICDF(31898), AOM_ICDF(32218),
- AOM_ICDF(32768), },
- {AOM_ICDF(16976), AOM_ICDF(17856), AOM_ICDF(29258), AOM_ICDF(29584),
- AOM_ICDF(32768), },
- {AOM_ICDF(8521), AOM_ICDF(8858), AOM_ICDF(21252), AOM_ICDF(21574),
- AOM_ICDF(32768), },
- {AOM_ICDF(4894), AOM_ICDF(5208), AOM_ICDF(13957), AOM_ICDF(14271),
- AOM_ICDF(32768), },
- {AOM_ICDF(3140), AOM_ICDF(3452), AOM_ICDF(9099), AOM_ICDF(9411),
- AOM_ICDF(32768), },
- {AOM_ICDF(1770), AOM_ICDF(2080), AOM_ICDF(5241), AOM_ICDF(5551),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(22253), AOM_ICDF(23279), AOM_ICDF(24319), AOM_ICDF(27691),
- AOM_ICDF(30884), AOM_ICDF(32768), },
- {AOM_ICDF(6281), AOM_ICDF(8348), AOM_ICDF(9473), AOM_ICDF(15740),
- AOM_ICDF(24879), AOM_ICDF(32768), },
- {AOM_ICDF(1265), AOM_ICDF(3893), AOM_ICDF(4482), AOM_ICDF(9694),
- AOM_ICDF(18376), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17243), AOM_ICDF(18993), AOM_ICDF(28515), AOM_ICDF(29242),
- AOM_ICDF(32768), },
- {AOM_ICDF(15645), AOM_ICDF(23632), AOM_ICDF(29905), AOM_ICDF(30416),
- AOM_ICDF(32768), },
- {AOM_ICDF(11203), AOM_ICDF(18441), AOM_ICDF(27037), AOM_ICDF(27930),
- AOM_ICDF(32768), },
- {AOM_ICDF(9306), AOM_ICDF(13788), AOM_ICDF(23647), AOM_ICDF(24669),
- AOM_ICDF(32768), },
- {AOM_ICDF(8076), AOM_ICDF(10237), AOM_ICDF(20500), AOM_ICDF(21437),
- AOM_ICDF(32768), },
- {AOM_ICDF(7214), AOM_ICDF(8133), AOM_ICDF(17608), AOM_ICDF(18202),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23555), AOM_ICDF(26147), AOM_ICDF(31229), AOM_ICDF(31581),
- AOM_ICDF(32768), },
- {AOM_ICDF(16046), AOM_ICDF(20455), AOM_ICDF(29711), AOM_ICDF(30107),
- AOM_ICDF(32768), },
- {AOM_ICDF(10810), AOM_ICDF(14014), AOM_ICDF(25967), AOM_ICDF(26499),
- AOM_ICDF(32768), },
- {AOM_ICDF(8267), AOM_ICDF(9930), AOM_ICDF(21704), AOM_ICDF(22244),
- AOM_ICDF(32768), },
- {AOM_ICDF(5637), AOM_ICDF(6282), AOM_ICDF(15954), AOM_ICDF(16508),
- AOM_ICDF(32768), },
- {AOM_ICDF(4090), AOM_ICDF(4363), AOM_ICDF(11771), AOM_ICDF(12044),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26146), AOM_ICDF(27425), AOM_ICDF(31658), AOM_ICDF(31983),
- AOM_ICDF(32768), },
- {AOM_ICDF(17486), AOM_ICDF(20295), AOM_ICDF(30279), AOM_ICDF(30621),
- AOM_ICDF(32768), },
- {AOM_ICDF(10812), AOM_ICDF(12230), AOM_ICDF(26095), AOM_ICDF(26460),
- AOM_ICDF(32768), },
- {AOM_ICDF(7510), AOM_ICDF(8042), AOM_ICDF(21058), AOM_ICDF(21425),
- AOM_ICDF(32768), },
- {AOM_ICDF(4566), AOM_ICDF(4916), AOM_ICDF(13594), AOM_ICDF(13891),
- AOM_ICDF(32768), },
- {AOM_ICDF(1956), AOM_ICDF(2445), AOM_ICDF(5380), AOM_ICDF(5869),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28423), AOM_ICDF(29253), AOM_ICDF(31959), AOM_ICDF(32277),
- AOM_ICDF(32768), },
- {AOM_ICDF(18711), AOM_ICDF(20638), AOM_ICDF(30445), AOM_ICDF(30777),
- AOM_ICDF(32768), },
- {AOM_ICDF(10301), AOM_ICDF(10903), AOM_ICDF(24702), AOM_ICDF(25060),
- AOM_ICDF(32768), },
- {AOM_ICDF(6531), AOM_ICDF(6885), AOM_ICDF(18215), AOM_ICDF(18535),
- AOM_ICDF(32768), },
- {AOM_ICDF(3965), AOM_ICDF(4265), AOM_ICDF(11701), AOM_ICDF(12023),
- AOM_ICDF(32768), },
- {AOM_ICDF(3255), AOM_ICDF(3906), AOM_ICDF(8897), AOM_ICDF(9548),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29905), AOM_ICDF(30382), AOM_ICDF(32053), AOM_ICDF(32369),
- AOM_ICDF(32768), },
- {AOM_ICDF(19724), AOM_ICDF(20376), AOM_ICDF(30778), AOM_ICDF(31101),
- AOM_ICDF(32768), },
- {AOM_ICDF(10430), AOM_ICDF(10786), AOM_ICDF(24620), AOM_ICDF(24943),
- AOM_ICDF(32768), },
- {AOM_ICDF(6151), AOM_ICDF(6475), AOM_ICDF(17188), AOM_ICDF(17504),
- AOM_ICDF(32768), },
- {AOM_ICDF(3728), AOM_ICDF(4034), AOM_ICDF(11352), AOM_ICDF(11658),
- AOM_ICDF(32768), },
- {AOM_ICDF(1456), AOM_ICDF(1748), AOM_ICDF(5024), AOM_ICDF(5316),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(24883), AOM_ICDF(25616), AOM_ICDF(27995), AOM_ICDF(29251),
- AOM_ICDF(31055), AOM_ICDF(32768), },
- {AOM_ICDF(9802), AOM_ICDF(11841), AOM_ICDF(18691), AOM_ICDF(22179),
- AOM_ICDF(26383), AOM_ICDF(32768), },
- {AOM_ICDF(4096), AOM_ICDF(7928), AOM_ICDF(14072), AOM_ICDF(21042),
- AOM_ICDF(23453), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(10363), AOM_ICDF(20924), AOM_ICDF(29116), AOM_ICDF(29906),
- AOM_ICDF(32768), },
- {AOM_ICDF(10682), AOM_ICDF(22326), AOM_ICDF(29093), AOM_ICDF(29642),
- AOM_ICDF(32768), },
- {AOM_ICDF(10304), AOM_ICDF(21073), AOM_ICDF(26843), AOM_ICDF(28904),
- AOM_ICDF(32768), },
- {AOM_ICDF(6138), AOM_ICDF(13221), AOM_ICDF(22475), AOM_ICDF(25119),
- AOM_ICDF(32768), },
- {AOM_ICDF(3788), AOM_ICDF(4356), AOM_ICDF(10607), AOM_ICDF(12690),
- AOM_ICDF(32768), },
- {AOM_ICDF(1950), AOM_ICDF(4291), AOM_ICDF(10923), AOM_ICDF(12873),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21958), AOM_ICDF(27093), AOM_ICDF(30741), AOM_ICDF(31349),
- AOM_ICDF(32768), },
- {AOM_ICDF(18725), AOM_ICDF(23406), AOM_ICDF(30541), AOM_ICDF(31268),
- AOM_ICDF(32768), },
- {AOM_ICDF(15634), AOM_ICDF(17134), AOM_ICDF(26450), AOM_ICDF(27092),
- AOM_ICDF(32768), },
- {AOM_ICDF(10012), AOM_ICDF(11287), AOM_ICDF(24758), AOM_ICDF(25304),
- AOM_ICDF(32768), },
- {AOM_ICDF(6242), AOM_ICDF(7802), AOM_ICDF(19895), AOM_ICDF(21065),
- AOM_ICDF(32768), },
- {AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(20480),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26587), AOM_ICDF(27934), AOM_ICDF(31817), AOM_ICDF(32094),
- AOM_ICDF(32768), },
- {AOM_ICDF(20234), AOM_ICDF(22651), AOM_ICDF(30576), AOM_ICDF(30857),
- AOM_ICDF(32768), },
- {AOM_ICDF(13405), AOM_ICDF(14708), AOM_ICDF(26624), AOM_ICDF(27183),
- AOM_ICDF(32768), },
- {AOM_ICDF(9132), AOM_ICDF(11281), AOM_ICDF(19876), AOM_ICDF(21487),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(15522), AOM_ICDF(20696),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28277), AOM_ICDF(29312), AOM_ICDF(32101), AOM_ICDF(32400),
- AOM_ICDF(32768), },
- {AOM_ICDF(18946), AOM_ICDF(23037), AOM_ICDF(31186), AOM_ICDF(31565),
- AOM_ICDF(32768), },
- {AOM_ICDF(14043), AOM_ICDF(14980), AOM_ICDF(29491), AOM_ICDF(30193),
- AOM_ICDF(32768), },
- {AOM_ICDF(9638), AOM_ICDF(12529), AOM_ICDF(21203), AOM_ICDF(24094),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(18022), AOM_ICDF(22938),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(31039), AOM_ICDF(31404), AOM_ICDF(32048), AOM_ICDF(32372),
- AOM_ICDF(32768), },
- {AOM_ICDF(20567), AOM_ICDF(21869), AOM_ICDF(28724), AOM_ICDF(29256),
- AOM_ICDF(32768), },
- {AOM_ICDF(10000), AOM_ICDF(11250), AOM_ICDF(22768), AOM_ICDF(23393),
- AOM_ICDF(32768), },
- {AOM_ICDF(6291), AOM_ICDF(7078), AOM_ICDF(20447), AOM_ICDF(21234),
- AOM_ICDF(32768), },
- {AOM_ICDF(3072), AOM_ICDF(6144), AOM_ICDF(18432), AOM_ICDF(21504),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(23448), AOM_ICDF(25882), AOM_ICDF(29692), AOM_ICDF(31272),
- AOM_ICDF(32065), AOM_ICDF(32768), },
- {AOM_ICDF(4276), AOM_ICDF(17832), AOM_ICDF(22156), AOM_ICDF(28463),
- AOM_ICDF(30374), AOM_ICDF(32768), },
- {AOM_ICDF(842), AOM_ICDF(20937), AOM_ICDF(22447), AOM_ICDF(28559),
- AOM_ICDF(30333), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(30469), AOM_ICDF(30991), AOM_ICDF(32114), AOM_ICDF(32435),
- AOM_ICDF(32768), },
- {AOM_ICDF(27295), AOM_ICDF(29153), AOM_ICDF(31917), AOM_ICDF(32269),
- AOM_ICDF(32768), },
- {AOM_ICDF(16309), AOM_ICDF(22060), AOM_ICDF(29937), AOM_ICDF(30686),
- AOM_ICDF(32768), },
- {AOM_ICDF(11440), AOM_ICDF(16853), AOM_ICDF(26633), AOM_ICDF(27427),
- AOM_ICDF(32768), },
- {AOM_ICDF(13069), AOM_ICDF(15405), AOM_ICDF(27401), AOM_ICDF(28033),
- AOM_ICDF(32768), },
- {AOM_ICDF(9084), AOM_ICDF(10058), AOM_ICDF(23197), AOM_ICDF(23684),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(30728), AOM_ICDF(31202), AOM_ICDF(32138), AOM_ICDF(32450),
- AOM_ICDF(32768), },
- {AOM_ICDF(23421), AOM_ICDF(26186), AOM_ICDF(31939), AOM_ICDF(32278),
- AOM_ICDF(32768), },
- {AOM_ICDF(12249), AOM_ICDF(15027), AOM_ICDF(28348), AOM_ICDF(28854),
- AOM_ICDF(32768), },
- {AOM_ICDF(5667), AOM_ICDF(6899), AOM_ICDF(22174), AOM_ICDF(23652),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(10650), AOM_ICDF(17203), AOM_ICDF(20480),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(30721), AOM_ICDF(31093), AOM_ICDF(32141), AOM_ICDF(32453),
- AOM_ICDF(32768), },
- {AOM_ICDF(24052), AOM_ICDF(25175), AOM_ICDF(31923), AOM_ICDF(32231),
- AOM_ICDF(32768), },
- {AOM_ICDF(8145), AOM_ICDF(9281), AOM_ICDF(27654), AOM_ICDF(28412),
- AOM_ICDF(32768), },
- {AOM_ICDF(7373), AOM_ICDF(9830), AOM_ICDF(21299), AOM_ICDF(23757),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(31284), AOM_ICDF(31621), AOM_ICDF(32143), AOM_ICDF(32455),
- AOM_ICDF(32768), },
- {AOM_ICDF(27783), AOM_ICDF(28563), AOM_ICDF(32045), AOM_ICDF(32361),
- AOM_ICDF(32768), },
- {AOM_ICDF(10149), AOM_ICDF(12179), AOM_ICDF(28128), AOM_ICDF(28998),
- AOM_ICDF(32768), },
- {AOM_ICDF(5650), AOM_ICDF(9039), AOM_ICDF(19209), AOM_ICDF(22599),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(31038), AOM_ICDF(31383), AOM_ICDF(32035), AOM_ICDF(32357),
- AOM_ICDF(32768), },
- {AOM_ICDF(20689), AOM_ICDF(22001), AOM_ICDF(28880), AOM_ICDF(29479),
- AOM_ICDF(32768), },
- {AOM_ICDF(7827), AOM_ICDF(10613), AOM_ICDF(24141), AOM_ICDF(24735),
- AOM_ICDF(32768), },
- {AOM_ICDF(8021), AOM_ICDF(8585), AOM_ICDF(22014), AOM_ICDF(22383),
- AOM_ICDF(32768), },
- {AOM_ICDF(6047), AOM_ICDF(6350), AOM_ICDF(19918), AOM_ICDF(20220),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
-};
+static const aom_cdf_prob
+ av1_default_eob_multi1024_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
+ 11)] = { { { { AOM_CDF11(393, 421, 751, 1623, 3160, 6352, 13345, 18047,
+ 22571, 25830) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } },
+ { { AOM_CDF11(1865, 1988, 2930, 4242, 10533, 16538, 21354,
+ 27255, 28546, 31784) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } } },
+ { { { AOM_CDF11(696, 948, 3145, 5702, 9706, 13217, 17851,
+ 21856, 25692, 28034) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } },
+ { { AOM_CDF11(2672, 3591, 9330, 17084, 22725, 24284, 26527,
+ 28027, 28377, 30876) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } } },
+ { { { AOM_CDF11(2784, 3831, 7041, 10521, 14847, 18844, 23155,
+ 26682, 29229, 31045) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } },
+ { { AOM_CDF11(9577, 12466, 17739, 20750, 22061, 23215, 24601,
+ 25483, 25843, 32056) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } } },
+ { { { AOM_CDF11(6698, 8334, 11961, 15762, 20186, 23862, 27434,
+ 29326, 31082, 32050) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } },
+ { { AOM_CDF11(20569, 22426, 25569, 26859, 28053, 28913,
+ 29486, 29724, 29807, 32570) },
+ { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+ 23831, 26810, 29789) } } } };
-static const coeff_cdf_model
-av1_default_coef_head_cdfs_q2[TX_SIZES][PLANE_TYPES] = {
- { // TX 4X4
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(23035), AOM_ICDF(23799), AOM_ICDF(27745), AOM_ICDF(29607),
- AOM_ICDF(30130), AOM_ICDF(32768), },
- {AOM_ICDF(12409), AOM_ICDF(14763), AOM_ICDF(22883), AOM_ICDF(26775),
- AOM_ICDF(27649), AOM_ICDF(32768), },
- {AOM_ICDF(5237), AOM_ICDF(9433), AOM_ICDF(15597), AOM_ICDF(21779),
- AOM_ICDF(23224), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(10424), AOM_ICDF(17678), AOM_ICDF(28850), AOM_ICDF(29349),
- AOM_ICDF(32768), },
- {AOM_ICDF(10376), AOM_ICDF(16902), AOM_ICDF(28779), AOM_ICDF(29265),
- AOM_ICDF(32768), },
- {AOM_ICDF(10166), AOM_ICDF(14387), AOM_ICDF(26253), AOM_ICDF(26807),
- AOM_ICDF(32768), },
- {AOM_ICDF(8474), AOM_ICDF(9927), AOM_ICDF(22092), AOM_ICDF(22697),
- AOM_ICDF(32768), },
- {AOM_ICDF(6415), AOM_ICDF(6911), AOM_ICDF(17155), AOM_ICDF(17579),
- AOM_ICDF(32768), },
- {AOM_ICDF(4611), AOM_ICDF(4928), AOM_ICDF(12174), AOM_ICDF(12497),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(16984), AOM_ICDF(21802), AOM_ICDF(30901), AOM_ICDF(31373),
- AOM_ICDF(32768), },
- {AOM_ICDF(14003), AOM_ICDF(19369), AOM_ICDF(30193), AOM_ICDF(30615),
- AOM_ICDF(32768), },
- {AOM_ICDF(10729), AOM_ICDF(13233), AOM_ICDF(26938), AOM_ICDF(27455),
- AOM_ICDF(32768), },
- {AOM_ICDF(8604), AOM_ICDF(9526), AOM_ICDF(22436), AOM_ICDF(22989),
- AOM_ICDF(32768), },
- {AOM_ICDF(6828), AOM_ICDF(7236), AOM_ICDF(18056), AOM_ICDF(18456),
- AOM_ICDF(32768), },
- {AOM_ICDF(4302), AOM_ICDF(4555), AOM_ICDF(12209), AOM_ICDF(12462),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(20261), AOM_ICDF(24381), AOM_ICDF(31612), AOM_ICDF(31989),
- AOM_ICDF(32768), },
- {AOM_ICDF(13775), AOM_ICDF(20449), AOM_ICDF(30685), AOM_ICDF(31111),
- AOM_ICDF(32768), },
- {AOM_ICDF(10459), AOM_ICDF(13768), AOM_ICDF(27504), AOM_ICDF(28114),
- AOM_ICDF(32768), },
- {AOM_ICDF(7994), AOM_ICDF(8989), AOM_ICDF(22906), AOM_ICDF(23636),
- AOM_ICDF(32768), },
- {AOM_ICDF(5928), AOM_ICDF(6460), AOM_ICDF(16884), AOM_ICDF(17720),
- AOM_ICDF(32768), },
- {AOM_ICDF(4520), AOM_ICDF(7910), AOM_ICDF(12429), AOM_ICDF(16949),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(17822), AOM_ICDF(26021), AOM_ICDF(31751), AOM_ICDF(32150),
- AOM_ICDF(32768), },
- {AOM_ICDF(13484), AOM_ICDF(23372), AOM_ICDF(31305), AOM_ICDF(31747),
- AOM_ICDF(32768), },
- {AOM_ICDF(11009), AOM_ICDF(15469), AOM_ICDF(28452), AOM_ICDF(29132),
- AOM_ICDF(32768), },
- {AOM_ICDF(8358), AOM_ICDF(9357), AOM_ICDF(22412), AOM_ICDF(23385),
- AOM_ICDF(32768), },
- {AOM_ICDF(9392), AOM_ICDF(10018), AOM_ICDF(18158), AOM_ICDF(19202),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(5236), AOM_ICDF(26529), AOM_ICDF(31709), AOM_ICDF(32201),
- AOM_ICDF(32768), },
- {AOM_ICDF(5710), AOM_ICDF(25925), AOM_ICDF(31254), AOM_ICDF(31967),
- AOM_ICDF(32768), },
- {AOM_ICDF(7645), AOM_ICDF(19427), AOM_ICDF(28170), AOM_ICDF(29920),
- AOM_ICDF(32768), },
- {AOM_ICDF(7427), AOM_ICDF(13350), AOM_ICDF(23253), AOM_ICDF(25438),
- AOM_ICDF(32768), },
- {AOM_ICDF(4681), AOM_ICDF(6687), AOM_ICDF(15381), AOM_ICDF(18725),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(11176), AOM_ICDF(18297), AOM_ICDF(19062), AOM_ICDF(28984),
- AOM_ICDF(29496), AOM_ICDF(32768), },
- {AOM_ICDF(9778), AOM_ICDF(17798), AOM_ICDF(19934), AOM_ICDF(28434),
- AOM_ICDF(28921), AOM_ICDF(32768), },
- {AOM_ICDF(4806), AOM_ICDF(14260), AOM_ICDF(17259), AOM_ICDF(26368),
- AOM_ICDF(26942), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(21802), AOM_ICDF(22916), AOM_ICDF(31657), AOM_ICDF(31989),
- AOM_ICDF(32768), },
- {AOM_ICDF(16874), AOM_ICDF(20345), AOM_ICDF(31048), AOM_ICDF(31389),
- AOM_ICDF(32768), },
- {AOM_ICDF(10717), AOM_ICDF(12576), AOM_ICDF(26899), AOM_ICDF(27294),
- AOM_ICDF(32768), },
- {AOM_ICDF(8468), AOM_ICDF(9404), AOM_ICDF(21928), AOM_ICDF(22358),
- AOM_ICDF(32768), },
- {AOM_ICDF(5992), AOM_ICDF(6521), AOM_ICDF(16309), AOM_ICDF(16729),
- AOM_ICDF(32768), },
- {AOM_ICDF(5134), AOM_ICDF(5452), AOM_ICDF(11491), AOM_ICDF(11865),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22003), AOM_ICDF(24147), AOM_ICDF(31841), AOM_ICDF(32177),
- AOM_ICDF(32768), },
- {AOM_ICDF(17179), AOM_ICDF(20593), AOM_ICDF(31041), AOM_ICDF(31394),
- AOM_ICDF(32768), },
- {AOM_ICDF(9282), AOM_ICDF(10544), AOM_ICDF(25698), AOM_ICDF(26133),
- AOM_ICDF(32768), },
- {AOM_ICDF(6301), AOM_ICDF(7013), AOM_ICDF(19066), AOM_ICDF(19557),
- AOM_ICDF(32768), },
- {AOM_ICDF(3845), AOM_ICDF(4316), AOM_ICDF(12209), AOM_ICDF(12812),
- AOM_ICDF(32768), },
- {AOM_ICDF(4819), AOM_ICDF(6746), AOM_ICDF(11565), AOM_ICDF(13011),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22820), AOM_ICDF(26023), AOM_ICDF(31888), AOM_ICDF(32236),
- AOM_ICDF(32768), },
- {AOM_ICDF(17130), AOM_ICDF(21510), AOM_ICDF(31268), AOM_ICDF(31632),
- AOM_ICDF(32768), },
- {AOM_ICDF(10062), AOM_ICDF(11898), AOM_ICDF(26787), AOM_ICDF(27281),
- AOM_ICDF(32768), },
- {AOM_ICDF(7681), AOM_ICDF(8590), AOM_ICDF(21264), AOM_ICDF(22034),
- AOM_ICDF(32768), },
- {AOM_ICDF(4413), AOM_ICDF(5143), AOM_ICDF(13605), AOM_ICDF(14712),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(21845),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(20237), AOM_ICDF(25695), AOM_ICDF(31868), AOM_ICDF(32222),
- AOM_ICDF(32768), },
- {AOM_ICDF(15551), AOM_ICDF(22658), AOM_ICDF(31236), AOM_ICDF(31659),
- AOM_ICDF(32768), },
- {AOM_ICDF(9584), AOM_ICDF(12389), AOM_ICDF(26347), AOM_ICDF(27242),
- AOM_ICDF(32768), },
- {AOM_ICDF(6067), AOM_ICDF(7231), AOM_ICDF(19625), AOM_ICDF(20707),
- AOM_ICDF(32768), },
- {AOM_ICDF(3724), AOM_ICDF(4312), AOM_ICDF(11269), AOM_ICDF(12425),
- AOM_ICDF(32768), },
- {AOM_ICDF(4096), AOM_ICDF(6554), AOM_ICDF(9830), AOM_ICDF(12288),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(11726), AOM_ICDF(26639), AOM_ICDF(31977), AOM_ICDF(32340),
- AOM_ICDF(32768), },
- {AOM_ICDF(10754), AOM_ICDF(25823), AOM_ICDF(31568), AOM_ICDF(32060),
- AOM_ICDF(32768), },
- {AOM_ICDF(8761), AOM_ICDF(16650), AOM_ICDF(27884), AOM_ICDF(29394),
- AOM_ICDF(32768), },
- {AOM_ICDF(7387), AOM_ICDF(9941), AOM_ICDF(21377), AOM_ICDF(23333),
- AOM_ICDF(32768), },
- {AOM_ICDF(2374), AOM_ICDF(3799), AOM_ICDF(16147), AOM_ICDF(19471),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(29271), AOM_ICDF(29645), AOM_ICDF(31447), AOM_ICDF(31951),
- AOM_ICDF(32313), AOM_ICDF(32768), },
- {AOM_ICDF(22174), AOM_ICDF(23288), AOM_ICDF(29633), AOM_ICDF(31096),
- AOM_ICDF(31701), AOM_ICDF(32768), },
- {AOM_ICDF(13601), AOM_ICDF(16603), AOM_ICDF(25296), AOM_ICDF(28966),
- AOM_ICDF(30043), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(13850), AOM_ICDF(26266), AOM_ICDF(31653), AOM_ICDF(32083),
- AOM_ICDF(32768), },
- {AOM_ICDF(11979), AOM_ICDF(24610), AOM_ICDF(31369), AOM_ICDF(31810),
- AOM_ICDF(32768), },
- {AOM_ICDF(11325), AOM_ICDF(18989), AOM_ICDF(29109), AOM_ICDF(29770),
- AOM_ICDF(32768), },
- {AOM_ICDF(9338), AOM_ICDF(11892), AOM_ICDF(25324), AOM_ICDF(26115),
- AOM_ICDF(32768), },
- {AOM_ICDF(5725), AOM_ICDF(6243), AOM_ICDF(18483), AOM_ICDF(18919),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(9830), AOM_ICDF(16384), AOM_ICDF(19661),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18097), AOM_ICDF(27765), AOM_ICDF(31891), AOM_ICDF(32286),
- AOM_ICDF(32768), },
- {AOM_ICDF(14735), AOM_ICDF(24632), AOM_ICDF(31577), AOM_ICDF(31970),
- AOM_ICDF(32768), },
- {AOM_ICDF(11031), AOM_ICDF(15675), AOM_ICDF(29109), AOM_ICDF(29716),
- AOM_ICDF(32768), },
- {AOM_ICDF(8859), AOM_ICDF(9891), AOM_ICDF(23909), AOM_ICDF(24940),
- AOM_ICDF(32768), },
- {AOM_ICDF(7864), AOM_ICDF(11796), AOM_ICDF(20972), AOM_ICDF(24904),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(21057), AOM_ICDF(29116), AOM_ICDF(32033), AOM_ICDF(32367),
- AOM_ICDF(32768), },
- {AOM_ICDF(15287), AOM_ICDF(25704), AOM_ICDF(31791), AOM_ICDF(32151),
- AOM_ICDF(32768), },
- {AOM_ICDF(12927), AOM_ICDF(18993), AOM_ICDF(30815), AOM_ICDF(31329),
- AOM_ICDF(32768), },
- {AOM_ICDF(13227), AOM_ICDF(16234), AOM_ICDF(27657), AOM_ICDF(28860),
- AOM_ICDF(32768), },
- {AOM_ICDF(6899), AOM_ICDF(12072), AOM_ICDF(18971), AOM_ICDF(25869),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(17688), AOM_ICDF(28768), AOM_ICDF(32140), AOM_ICDF(32435),
- AOM_ICDF(32768), },
- {AOM_ICDF(13473), AOM_ICDF(26360), AOM_ICDF(31944), AOM_ICDF(32307),
- AOM_ICDF(32768), },
- {AOM_ICDF(12653), AOM_ICDF(18817), AOM_ICDF(28875), AOM_ICDF(30497),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(20025), AOM_ICDF(25486),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(6820), AOM_ICDF(28765), AOM_ICDF(31878), AOM_ICDF(32323),
- AOM_ICDF(32768), },
- {AOM_ICDF(7737), AOM_ICDF(28672), AOM_ICDF(31972), AOM_ICDF(32313),
- AOM_ICDF(32768), },
- {AOM_ICDF(11796), AOM_ICDF(18350), AOM_ICDF(24904), AOM_ICDF(28836),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(30079), AOM_ICDF(30525), AOM_ICDF(31559), AOM_ICDF(32085),
- AOM_ICDF(32407), AOM_ICDF(32768), },
- {AOM_ICDF(22148), AOM_ICDF(24035), AOM_ICDF(29557), AOM_ICDF(31423),
- AOM_ICDF(31881), AOM_ICDF(32768), },
- {AOM_ICDF(13266), AOM_ICDF(17717), AOM_ICDF(26069), AOM_ICDF(29825),
- AOM_ICDF(30780), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(18219), AOM_ICDF(27530), AOM_ICDF(32048), AOM_ICDF(32373),
- AOM_ICDF(32768), },
- {AOM_ICDF(14664), AOM_ICDF(25532), AOM_ICDF(31886), AOM_ICDF(32244),
- AOM_ICDF(32768), },
- {AOM_ICDF(11683), AOM_ICDF(19554), AOM_ICDF(30330), AOM_ICDF(30870),
- AOM_ICDF(32768), },
- {AOM_ICDF(9410), AOM_ICDF(14238), AOM_ICDF(25794), AOM_ICDF(27268),
- AOM_ICDF(32768), },
- {AOM_ICDF(6629), AOM_ICDF(9580), AOM_ICDF(20186), AOM_ICDF(22187),
- AOM_ICDF(32768), },
- {AOM_ICDF(2891), AOM_ICDF(4337), AOM_ICDF(11083), AOM_ICDF(13493),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20016), AOM_ICDF(28471), AOM_ICDF(32074), AOM_ICDF(32401),
- AOM_ICDF(32768), },
- {AOM_ICDF(16915), AOM_ICDF(26047), AOM_ICDF(31965), AOM_ICDF(32300),
- AOM_ICDF(32768), },
- {AOM_ICDF(10725), AOM_ICDF(18206), AOM_ICDF(30056), AOM_ICDF(30606),
- AOM_ICDF(32768), },
- {AOM_ICDF(6883), AOM_ICDF(13990), AOM_ICDF(26334), AOM_ICDF(27531),
- AOM_ICDF(32768), },
- {AOM_ICDF(11529), AOM_ICDF(15170), AOM_ICDF(22452), AOM_ICDF(24879),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(23488), AOM_ICDF(29744), AOM_ICDF(32117), AOM_ICDF(32442),
- AOM_ICDF(32768), },
- {AOM_ICDF(17520), AOM_ICDF(27259), AOM_ICDF(32056), AOM_ICDF(32389),
- AOM_ICDF(32768), },
- {AOM_ICDF(13107), AOM_ICDF(20597), AOM_ICDF(31416), AOM_ICDF(32092),
- AOM_ICDF(32768), },
- {AOM_ICDF(20165), AOM_ICDF(22686), AOM_ICDF(26887), AOM_ICDF(29407),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(17711), AOM_ICDF(29963), AOM_ICDF(32137), AOM_ICDF(32452),
- AOM_ICDF(32768), },
- {AOM_ICDF(14078), AOM_ICDF(28336), AOM_ICDF(32026), AOM_ICDF(32391),
- AOM_ICDF(32768), },
- {AOM_ICDF(11129), AOM_ICDF(28749), AOM_ICDF(30295), AOM_ICDF(31222),
- AOM_ICDF(32768), },
- {AOM_ICDF(7447), AOM_ICDF(13405), AOM_ICDF(22342), AOM_ICDF(26810),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(14413), AOM_ICDF(30309), AOM_ICDF(32090), AOM_ICDF(32471),
- AOM_ICDF(32768), },
- {AOM_ICDF(11814), AOM_ICDF(30354), AOM_ICDF(32251), AOM_ICDF(32509),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(21845), AOM_ICDF(27307),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 8X8
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(16945), AOM_ICDF(18241), AOM_ICDF(25718), AOM_ICDF(28152),
- AOM_ICDF(29383), AOM_ICDF(32768), },
- {AOM_ICDF(7095), AOM_ICDF(10051), AOM_ICDF(18830), AOM_ICDF(23174),
- AOM_ICDF(24906), AOM_ICDF(32768), },
- {AOM_ICDF(2585), AOM_ICDF(6677), AOM_ICDF(10951), AOM_ICDF(17411),
- AOM_ICDF(18916), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(12894), AOM_ICDF(17897), AOM_ICDF(28218), AOM_ICDF(28651),
- AOM_ICDF(32768), },
- {AOM_ICDF(11333), AOM_ICDF(16802), AOM_ICDF(27676), AOM_ICDF(28153),
- AOM_ICDF(32768), },
- {AOM_ICDF(10166), AOM_ICDF(13829), AOM_ICDF(25072), AOM_ICDF(25646),
- AOM_ICDF(32768), },
- {AOM_ICDF(8356), AOM_ICDF(9772), AOM_ICDF(21358), AOM_ICDF(21912),
- AOM_ICDF(32768), },
- {AOM_ICDF(5988), AOM_ICDF(6506), AOM_ICDF(16203), AOM_ICDF(16647),
- AOM_ICDF(32768), },
- {AOM_ICDF(3684), AOM_ICDF(4012), AOM_ICDF(10039), AOM_ICDF(10367),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18192), AOM_ICDF(21044), AOM_ICDF(30229), AOM_ICDF(30597),
- AOM_ICDF(32768), },
- {AOM_ICDF(14976), AOM_ICDF(18218), AOM_ICDF(29191), AOM_ICDF(29564),
- AOM_ICDF(32768), },
- {AOM_ICDF(10914), AOM_ICDF(12508), AOM_ICDF(25451), AOM_ICDF(25857),
- AOM_ICDF(32768), },
- {AOM_ICDF(7970), AOM_ICDF(8605), AOM_ICDF(20619), AOM_ICDF(21011),
- AOM_ICDF(32768), },
- {AOM_ICDF(5555), AOM_ICDF(5926), AOM_ICDF(15730), AOM_ICDF(16091),
- AOM_ICDF(32768), },
- {AOM_ICDF(3522), AOM_ICDF(3847), AOM_ICDF(10567), AOM_ICDF(10892),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(21896), AOM_ICDF(23866), AOM_ICDF(31136), AOM_ICDF(31486),
- AOM_ICDF(32768), },
- {AOM_ICDF(15913), AOM_ICDF(18331), AOM_ICDF(29670), AOM_ICDF(30019),
- AOM_ICDF(32768), },
- {AOM_ICDF(10158), AOM_ICDF(10878), AOM_ICDF(24664), AOM_ICDF(25024),
- AOM_ICDF(32768), },
- {AOM_ICDF(6692), AOM_ICDF(7070), AOM_ICDF(18934), AOM_ICDF(19267),
- AOM_ICDF(32768), },
- {AOM_ICDF(4603), AOM_ICDF(4914), AOM_ICDF(13724), AOM_ICDF(14041),
- AOM_ICDF(32768), },
- {AOM_ICDF(2378), AOM_ICDF(3171), AOM_ICDF(7663), AOM_ICDF(8456),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24113), AOM_ICDF(25740), AOM_ICDF(31668), AOM_ICDF(32000),
- AOM_ICDF(32768), },
- {AOM_ICDF(16618), AOM_ICDF(18583), AOM_ICDF(30173), AOM_ICDF(30511),
- AOM_ICDF(32768), },
- {AOM_ICDF(10122), AOM_ICDF(10666), AOM_ICDF(24877), AOM_ICDF(25222),
- AOM_ICDF(32768), },
- {AOM_ICDF(6721), AOM_ICDF(7062), AOM_ICDF(19250), AOM_ICDF(19588),
- AOM_ICDF(32768), },
- {AOM_ICDF(4641), AOM_ICDF(4957), AOM_ICDF(13698), AOM_ICDF(14021),
- AOM_ICDF(32768), },
- {AOM_ICDF(3324), AOM_ICDF(4749), AOM_ICDF(9498), AOM_ICDF(10923),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(24933), AOM_ICDF(27294), AOM_ICDF(31876), AOM_ICDF(32207),
- AOM_ICDF(32768), },
- {AOM_ICDF(17505), AOM_ICDF(20214), AOM_ICDF(30842), AOM_ICDF(31189),
- AOM_ICDF(32768), },
- {AOM_ICDF(10756), AOM_ICDF(11345), AOM_ICDF(25989), AOM_ICDF(26362),
- AOM_ICDF(32768), },
- {AOM_ICDF(7374), AOM_ICDF(7763), AOM_ICDF(19820), AOM_ICDF(20160),
- AOM_ICDF(32768), },
- {AOM_ICDF(5003), AOM_ICDF(5328), AOM_ICDF(15420), AOM_ICDF(15723),
- AOM_ICDF(32768), },
- {AOM_ICDF(4915), AOM_ICDF(9830), AOM_ICDF(18022), AOM_ICDF(22938),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(7874), AOM_ICDF(17174), AOM_ICDF(19119), AOM_ICDF(28514),
- AOM_ICDF(29361), AOM_ICDF(32768), },
- {AOM_ICDF(3407), AOM_ICDF(13628), AOM_ICDF(16836), AOM_ICDF(26723),
- AOM_ICDF(27681), AOM_ICDF(32768), },
- {AOM_ICDF(1062), AOM_ICDF(11514), AOM_ICDF(14002), AOM_ICDF(24081),
- AOM_ICDF(25232), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23614), AOM_ICDF(24717), AOM_ICDF(31593), AOM_ICDF(31927),
- AOM_ICDF(32768), },
- {AOM_ICDF(18177), AOM_ICDF(21581), AOM_ICDF(30890), AOM_ICDF(31234),
- AOM_ICDF(32768), },
- {AOM_ICDF(12535), AOM_ICDF(14549), AOM_ICDF(27749), AOM_ICDF(28134),
- AOM_ICDF(32768), },
- {AOM_ICDF(9687), AOM_ICDF(10712), AOM_ICDF(23848), AOM_ICDF(24271),
- AOM_ICDF(32768), },
- {AOM_ICDF(6461), AOM_ICDF(7119), AOM_ICDF(17940), AOM_ICDF(18368),
- AOM_ICDF(32768), },
- {AOM_ICDF(3863), AOM_ICDF(4245), AOM_ICDF(10904), AOM_ICDF(11278),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24334), AOM_ICDF(25912), AOM_ICDF(31795), AOM_ICDF(32120),
- AOM_ICDF(32768), },
- {AOM_ICDF(17964), AOM_ICDF(20229), AOM_ICDF(30726), AOM_ICDF(31064),
- AOM_ICDF(32768), },
- {AOM_ICDF(10463), AOM_ICDF(11527), AOM_ICDF(25898), AOM_ICDF(26256),
- AOM_ICDF(32768), },
- {AOM_ICDF(7431), AOM_ICDF(8071), AOM_ICDF(20542), AOM_ICDF(20928),
- AOM_ICDF(32768), },
- {AOM_ICDF(4561), AOM_ICDF(4995), AOM_ICDF(13977), AOM_ICDF(14347),
- AOM_ICDF(32768), },
- {AOM_ICDF(2427), AOM_ICDF(2687), AOM_ICDF(8149), AOM_ICDF(8409),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25888), AOM_ICDF(27308), AOM_ICDF(31957), AOM_ICDF(32279),
- AOM_ICDF(32768), },
- {AOM_ICDF(18868), AOM_ICDF(20992), AOM_ICDF(31092), AOM_ICDF(31424),
- AOM_ICDF(32768), },
- {AOM_ICDF(10480), AOM_ICDF(11191), AOM_ICDF(25801), AOM_ICDF(26149),
- AOM_ICDF(32768), },
- {AOM_ICDF(6878), AOM_ICDF(7326), AOM_ICDF(19397), AOM_ICDF(19762),
- AOM_ICDF(32768), },
- {AOM_ICDF(4235), AOM_ICDF(4601), AOM_ICDF(13182), AOM_ICDF(13587),
- AOM_ICDF(32768), },
- {AOM_ICDF(3584), AOM_ICDF(5120), AOM_ICDF(11264), AOM_ICDF(13312),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26802), AOM_ICDF(28181), AOM_ICDF(32031), AOM_ICDF(32349),
- AOM_ICDF(32768), },
- {AOM_ICDF(19661), AOM_ICDF(21746), AOM_ICDF(31360), AOM_ICDF(31688),
- AOM_ICDF(32768), },
- {AOM_ICDF(10680), AOM_ICDF(11361), AOM_ICDF(26261), AOM_ICDF(26610),
- AOM_ICDF(32768), },
- {AOM_ICDF(6811), AOM_ICDF(7274), AOM_ICDF(19689), AOM_ICDF(20075),
- AOM_ICDF(32768), },
- {AOM_ICDF(4881), AOM_ICDF(5230), AOM_ICDF(11882), AOM_ICDF(12324),
- AOM_ICDF(32768), },
- {AOM_ICDF(4096), AOM_ICDF(6144), AOM_ICDF(9557), AOM_ICDF(11605),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27511), AOM_ICDF(29045), AOM_ICDF(32051), AOM_ICDF(32376),
- AOM_ICDF(32768), },
- {AOM_ICDF(19712), AOM_ICDF(22596), AOM_ICDF(31464), AOM_ICDF(31813),
- AOM_ICDF(32768), },
- {AOM_ICDF(11035), AOM_ICDF(11852), AOM_ICDF(26626), AOM_ICDF(27082),
- AOM_ICDF(32768), },
- {AOM_ICDF(7190), AOM_ICDF(7674), AOM_ICDF(20245), AOM_ICDF(20794),
- AOM_ICDF(32768), },
- {AOM_ICDF(5114), AOM_ICDF(5407), AOM_ICDF(12895), AOM_ICDF(13443),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(15522), AOM_ICDF(20696),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(26201), AOM_ICDF(26641), AOM_ICDF(31158), AOM_ICDF(31755),
- AOM_ICDF(32200), AOM_ICDF(32768), },
- {AOM_ICDF(19651), AOM_ICDF(20883), AOM_ICDF(28935), AOM_ICDF(30581),
- AOM_ICDF(31426), AOM_ICDF(32768), },
- {AOM_ICDF(12456), AOM_ICDF(15868), AOM_ICDF(23727), AOM_ICDF(27839),
- AOM_ICDF(29216), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(16708), AOM_ICDF(25600), AOM_ICDF(31550), AOM_ICDF(31927),
- AOM_ICDF(32768), },
- {AOM_ICDF(14533), AOM_ICDF(24134), AOM_ICDF(31151), AOM_ICDF(31670),
- AOM_ICDF(32768), },
- {AOM_ICDF(12771), AOM_ICDF(19041), AOM_ICDF(29256), AOM_ICDF(29926),
- AOM_ICDF(32768), },
- {AOM_ICDF(9497), AOM_ICDF(12011), AOM_ICDF(24856), AOM_ICDF(25648),
- AOM_ICDF(32768), },
- {AOM_ICDF(6059), AOM_ICDF(6512), AOM_ICDF(17765), AOM_ICDF(18218),
- AOM_ICDF(32768), },
- {AOM_ICDF(4498), AOM_ICDF(6425), AOM_ICDF(13493), AOM_ICDF(15420),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(21314), AOM_ICDF(26763), AOM_ICDF(31645), AOM_ICDF(32043),
- AOM_ICDF(32768), },
- {AOM_ICDF(16898), AOM_ICDF(23241), AOM_ICDF(31276), AOM_ICDF(31667),
- AOM_ICDF(32768), },
- {AOM_ICDF(12339), AOM_ICDF(16091), AOM_ICDF(28493), AOM_ICDF(28851),
- AOM_ICDF(32768), },
- {AOM_ICDF(8583), AOM_ICDF(10033), AOM_ICDF(23721), AOM_ICDF(24359),
- AOM_ICDF(32768), },
- {AOM_ICDF(6801), AOM_ICDF(7728), AOM_ICDF(18857), AOM_ICDF(19784),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25155), AOM_ICDF(28551), AOM_ICDF(31936), AOM_ICDF(32273),
- AOM_ICDF(32768), },
- {AOM_ICDF(18054), AOM_ICDF(22818), AOM_ICDF(31343), AOM_ICDF(31736),
- AOM_ICDF(32768), },
- {AOM_ICDF(12381), AOM_ICDF(14088), AOM_ICDF(27865), AOM_ICDF(28300),
- AOM_ICDF(32768), },
- {AOM_ICDF(7853), AOM_ICDF(8666), AOM_ICDF(21665), AOM_ICDF(22477),
- AOM_ICDF(32768), },
- {AOM_ICDF(6242), AOM_ICDF(10923), AOM_ICDF(15604), AOM_ICDF(20285),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26649), AOM_ICDF(29334), AOM_ICDF(32001), AOM_ICDF(32345),
- AOM_ICDF(32768), },
- {AOM_ICDF(18410), AOM_ICDF(22788), AOM_ICDF(31465), AOM_ICDF(31842),
- AOM_ICDF(32768), },
- {AOM_ICDF(12504), AOM_ICDF(13480), AOM_ICDF(28600), AOM_ICDF(28955),
- AOM_ICDF(32768), },
- {AOM_ICDF(9175), AOM_ICDF(10486), AOM_ICDF(21845), AOM_ICDF(23156),
- AOM_ICDF(32768), },
- {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27622), AOM_ICDF(30399), AOM_ICDF(32070), AOM_ICDF(32399),
- AOM_ICDF(32768), },
- {AOM_ICDF(18214), AOM_ICDF(24797), AOM_ICDF(31688), AOM_ICDF(32070),
- AOM_ICDF(32768), },
- {AOM_ICDF(14564), AOM_ICDF(16894), AOM_ICDF(28981), AOM_ICDF(29564),
- AOM_ICDF(32768), },
- {AOM_ICDF(7802), AOM_ICDF(12483), AOM_ICDF(17164), AOM_ICDF(21845),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(30040), AOM_ICDF(30464), AOM_ICDF(31682), AOM_ICDF(32091),
- AOM_ICDF(32421), AOM_ICDF(32768), },
- {AOM_ICDF(20770), AOM_ICDF(22635), AOM_ICDF(29889), AOM_ICDF(31156),
- AOM_ICDF(31909), AOM_ICDF(32768), },
- {AOM_ICDF(9112), AOM_ICDF(13841), AOM_ICDF(23864), AOM_ICDF(27288),
- AOM_ICDF(30322), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23477), AOM_ICDF(28240), AOM_ICDF(32035), AOM_ICDF(32360),
- AOM_ICDF(32768), },
- {AOM_ICDF(18183), AOM_ICDF(26268), AOM_ICDF(31861), AOM_ICDF(32205),
- AOM_ICDF(32768), },
- {AOM_ICDF(14392), AOM_ICDF(23052), AOM_ICDF(30811), AOM_ICDF(31315),
- AOM_ICDF(32768), },
- {AOM_ICDF(12579), AOM_ICDF(20081), AOM_ICDF(28411), AOM_ICDF(29467),
- AOM_ICDF(32768), },
- {AOM_ICDF(9882), AOM_ICDF(14796), AOM_ICDF(25492), AOM_ICDF(27040),
- AOM_ICDF(32768), },
- {AOM_ICDF(11141), AOM_ICDF(13107), AOM_ICDF(21627), AOM_ICDF(23593),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24700), AOM_ICDF(28735), AOM_ICDF(32055), AOM_ICDF(32379),
- AOM_ICDF(32768), },
- {AOM_ICDF(19703), AOM_ICDF(25203), AOM_ICDF(31809), AOM_ICDF(32142),
- AOM_ICDF(32768), },
- {AOM_ICDF(12756), AOM_ICDF(18882), AOM_ICDF(30716), AOM_ICDF(31103),
- AOM_ICDF(32768), },
- {AOM_ICDF(9508), AOM_ICDF(13922), AOM_ICDF(25977), AOM_ICDF(26826),
- AOM_ICDF(32768), },
- {AOM_ICDF(5243), AOM_ICDF(9175), AOM_ICDF(19661), AOM_ICDF(23593),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26792), AOM_ICDF(29367), AOM_ICDF(32090), AOM_ICDF(32407),
- AOM_ICDF(32768), },
- {AOM_ICDF(21899), AOM_ICDF(25640), AOM_ICDF(31870), AOM_ICDF(32192),
- AOM_ICDF(32768), },
- {AOM_ICDF(14205), AOM_ICDF(16907), AOM_ICDF(30415), AOM_ICDF(30764),
- AOM_ICDF(32768), },
- {AOM_ICDF(10570), AOM_ICDF(13741), AOM_ICDF(23255), AOM_ICDF(26426),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27743), AOM_ICDF(29950), AOM_ICDF(32116), AOM_ICDF(32430),
- AOM_ICDF(32768), },
- {AOM_ICDF(21595), AOM_ICDF(24944), AOM_ICDF(31927), AOM_ICDF(32259),
- AOM_ICDF(32768), },
- {AOM_ICDF(15227), AOM_ICDF(16673), AOM_ICDF(30744), AOM_ICDF(31130),
- AOM_ICDF(32768), },
- {AOM_ICDF(13797), AOM_ICDF(16384), AOM_ICDF(25007), AOM_ICDF(27594),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(28888), AOM_ICDF(30883), AOM_ICDF(32127), AOM_ICDF(32447),
- AOM_ICDF(32768), },
- {AOM_ICDF(20978), AOM_ICDF(26121), AOM_ICDF(32090), AOM_ICDF(32406),
- AOM_ICDF(32768), },
- {AOM_ICDF(16644), AOM_ICDF(18725), AOM_ICDF(30427), AOM_ICDF(31468),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(22938), AOM_ICDF(27853),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 16X16
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(2791), AOM_ICDF(5929), AOM_ICDF(15783), AOM_ICDF(21305),
- AOM_ICDF(24756), AOM_ICDF(32768), },
- {AOM_ICDF(2492), AOM_ICDF(5974), AOM_ICDF(11999), AOM_ICDF(17892),
- AOM_ICDF(20328), AOM_ICDF(32768), },
- {AOM_ICDF(1232), AOM_ICDF(4784), AOM_ICDF(7266), AOM_ICDF(13409),
- AOM_ICDF(14638), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(10984), AOM_ICDF(15590), AOM_ICDF(26386), AOM_ICDF(26860),
- AOM_ICDF(32768), },
- {AOM_ICDF(10300), AOM_ICDF(15555), AOM_ICDF(26075), AOM_ICDF(26661),
- AOM_ICDF(32768), },
- {AOM_ICDF(9016), AOM_ICDF(12368), AOM_ICDF(23292), AOM_ICDF(24037),
- AOM_ICDF(32768), },
- {AOM_ICDF(7432), AOM_ICDF(9010), AOM_ICDF(19640), AOM_ICDF(20245),
- AOM_ICDF(32768), },
- {AOM_ICDF(5340), AOM_ICDF(5830), AOM_ICDF(14605), AOM_ICDF(15017),
- AOM_ICDF(32768), },
- {AOM_ICDF(3041), AOM_ICDF(3357), AOM_ICDF(8664), AOM_ICDF(8983),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(17487), AOM_ICDF(19944), AOM_ICDF(29422), AOM_ICDF(29785),
- AOM_ICDF(32768), },
- {AOM_ICDF(14365), AOM_ICDF(17572), AOM_ICDF(28369), AOM_ICDF(28763),
- AOM_ICDF(32768), },
- {AOM_ICDF(10944), AOM_ICDF(12562), AOM_ICDF(24945), AOM_ICDF(25372),
- AOM_ICDF(32768), },
- {AOM_ICDF(8061), AOM_ICDF(8670), AOM_ICDF(20179), AOM_ICDF(20570),
- AOM_ICDF(32768), },
- {AOM_ICDF(5386), AOM_ICDF(5759), AOM_ICDF(14881), AOM_ICDF(15238),
- AOM_ICDF(32768), },
- {AOM_ICDF(3124), AOM_ICDF(3450), AOM_ICDF(9578), AOM_ICDF(9895),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(21610), AOM_ICDF(23212), AOM_ICDF(30674), AOM_ICDF(31007),
- AOM_ICDF(32768), },
- {AOM_ICDF(15516), AOM_ICDF(17922), AOM_ICDF(29225), AOM_ICDF(29573),
- AOM_ICDF(32768), },
- {AOM_ICDF(10431), AOM_ICDF(11308), AOM_ICDF(24594), AOM_ICDF(24955),
- AOM_ICDF(32768), },
- {AOM_ICDF(6949), AOM_ICDF(7331), AOM_ICDF(18758), AOM_ICDF(19089),
- AOM_ICDF(32768), },
- {AOM_ICDF(4564), AOM_ICDF(4898), AOM_ICDF(12730), AOM_ICDF(13048),
- AOM_ICDF(32768), },
- {AOM_ICDF(2435), AOM_ICDF(2739), AOM_ICDF(7406), AOM_ICDF(7710),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24469), AOM_ICDF(25838), AOM_ICDF(31499), AOM_ICDF(31824),
- AOM_ICDF(32768), },
- {AOM_ICDF(17238), AOM_ICDF(18899), AOM_ICDF(30066), AOM_ICDF(30395),
- AOM_ICDF(32768), },
- {AOM_ICDF(10423), AOM_ICDF(10890), AOM_ICDF(24655), AOM_ICDF(24992),
- AOM_ICDF(32768), },
- {AOM_ICDF(6612), AOM_ICDF(6939), AOM_ICDF(18149), AOM_ICDF(18467),
- AOM_ICDF(32768), },
- {AOM_ICDF(4122), AOM_ICDF(4431), AOM_ICDF(12556), AOM_ICDF(12874),
- AOM_ICDF(32768), },
- {AOM_ICDF(1910), AOM_ICDF(2211), AOM_ICDF(7840), AOM_ICDF(8142),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27205), AOM_ICDF(28145), AOM_ICDF(31900), AOM_ICDF(32218),
- AOM_ICDF(32768), },
- {AOM_ICDF(18503), AOM_ICDF(19729), AOM_ICDF(30590), AOM_ICDF(30916),
- AOM_ICDF(32768), },
- {AOM_ICDF(10343), AOM_ICDF(10734), AOM_ICDF(24636), AOM_ICDF(24963),
- AOM_ICDF(32768), },
- {AOM_ICDF(6629), AOM_ICDF(6955), AOM_ICDF(18492), AOM_ICDF(18810),
- AOM_ICDF(32768), },
- {AOM_ICDF(4131), AOM_ICDF(4437), AOM_ICDF(13086), AOM_ICDF(13392),
- AOM_ICDF(32768), },
- {AOM_ICDF(4005), AOM_ICDF(5097), AOM_ICDF(9102), AOM_ICDF(10194),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(1286), AOM_ICDF(10273), AOM_ICDF(21021), AOM_ICDF(28617),
- AOM_ICDF(29729), AOM_ICDF(32768), },
- {AOM_ICDF(941), AOM_ICDF(10009), AOM_ICDF(17718), AOM_ICDF(25847),
- AOM_ICDF(27712), AOM_ICDF(32768), },
- {AOM_ICDF(508), AOM_ICDF(9488), AOM_ICDF(12907), AOM_ICDF(21634),
- AOM_ICDF(23969), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23900), AOM_ICDF(25135), AOM_ICDF(31528), AOM_ICDF(31861),
- AOM_ICDF(32768), },
- {AOM_ICDF(18613), AOM_ICDF(22015), AOM_ICDF(30774), AOM_ICDF(31124),
- AOM_ICDF(32768), },
- {AOM_ICDF(13064), AOM_ICDF(16135), AOM_ICDF(28060), AOM_ICDF(28484),
- AOM_ICDF(32768), },
- {AOM_ICDF(10563), AOM_ICDF(12428), AOM_ICDF(24847), AOM_ICDF(25281),
- AOM_ICDF(32768), },
- {AOM_ICDF(7960), AOM_ICDF(9069), AOM_ICDF(20548), AOM_ICDF(21017),
- AOM_ICDF(32768), },
- {AOM_ICDF(6944), AOM_ICDF(7491), AOM_ICDF(16595), AOM_ICDF(17007),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24972), AOM_ICDF(26434), AOM_ICDF(31771), AOM_ICDF(32097),
- AOM_ICDF(32768), },
- {AOM_ICDF(18362), AOM_ICDF(20757), AOM_ICDF(30733), AOM_ICDF(31070),
- AOM_ICDF(32768), },
- {AOM_ICDF(11226), AOM_ICDF(12487), AOM_ICDF(26292), AOM_ICDF(26651),
- AOM_ICDF(32768), },
- {AOM_ICDF(7823), AOM_ICDF(8448), AOM_ICDF(20940), AOM_ICDF(21314),
- AOM_ICDF(32768), },
- {AOM_ICDF(4964), AOM_ICDF(5365), AOM_ICDF(14104), AOM_ICDF(14457),
- AOM_ICDF(32768), },
- {AOM_ICDF(2435), AOM_ICDF(2712), AOM_ICDF(8247), AOM_ICDF(8524),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26551), AOM_ICDF(27694), AOM_ICDF(31943), AOM_ICDF(32261),
- AOM_ICDF(32768), },
- {AOM_ICDF(19519), AOM_ICDF(21452), AOM_ICDF(31120), AOM_ICDF(31446),
- AOM_ICDF(32768), },
- {AOM_ICDF(11272), AOM_ICDF(11965), AOM_ICDF(26389), AOM_ICDF(26736),
- AOM_ICDF(32768), },
- {AOM_ICDF(7109), AOM_ICDF(7485), AOM_ICDF(19585), AOM_ICDF(19920),
- AOM_ICDF(32768), },
- {AOM_ICDF(4033), AOM_ICDF(4370), AOM_ICDF(12546), AOM_ICDF(12865),
- AOM_ICDF(32768), },
- {AOM_ICDF(1570), AOM_ICDF(2158), AOM_ICDF(7456), AOM_ICDF(8045),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27654), AOM_ICDF(28637), AOM_ICDF(32030), AOM_ICDF(32345),
- AOM_ICDF(32768), },
- {AOM_ICDF(20795), AOM_ICDF(22232), AOM_ICDF(31351), AOM_ICDF(31672),
- AOM_ICDF(32768), },
- {AOM_ICDF(10841), AOM_ICDF(11329), AOM_ICDF(25676), AOM_ICDF(26002),
- AOM_ICDF(32768), },
- {AOM_ICDF(6589), AOM_ICDF(6943), AOM_ICDF(18084), AOM_ICDF(18412),
- AOM_ICDF(32768), },
- {AOM_ICDF(3970), AOM_ICDF(4279), AOM_ICDF(12009), AOM_ICDF(12318),
- AOM_ICDF(32768), },
- {AOM_ICDF(3449), AOM_ICDF(3967), AOM_ICDF(7761), AOM_ICDF(8278),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29545), AOM_ICDF(30314), AOM_ICDF(32084), AOM_ICDF(32404),
- AOM_ICDF(32768), },
- {AOM_ICDF(21229), AOM_ICDF(22783), AOM_ICDF(31470), AOM_ICDF(31800),
- AOM_ICDF(32768), },
- {AOM_ICDF(10409), AOM_ICDF(11031), AOM_ICDF(25267), AOM_ICDF(25669),
- AOM_ICDF(32768), },
- {AOM_ICDF(6456), AOM_ICDF(6909), AOM_ICDF(18270), AOM_ICDF(18674),
- AOM_ICDF(32768), },
- {AOM_ICDF(4253), AOM_ICDF(5017), AOM_ICDF(13288), AOM_ICDF(13706),
- AOM_ICDF(32768), },
- {AOM_ICDF(1627), AOM_ICDF(2324), AOM_ICDF(8831), AOM_ICDF(9528),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(24627), AOM_ICDF(25102), AOM_ICDF(30943), AOM_ICDF(31607),
- AOM_ICDF(32215), AOM_ICDF(32768), },
- {AOM_ICDF(17408), AOM_ICDF(18757), AOM_ICDF(28256), AOM_ICDF(30111),
- AOM_ICDF(31225), AOM_ICDF(32768), },
- {AOM_ICDF(10984), AOM_ICDF(14293), AOM_ICDF(22894), AOM_ICDF(27503),
- AOM_ICDF(28853), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(16390), AOM_ICDF(25826), AOM_ICDF(31293), AOM_ICDF(31726),
- AOM_ICDF(32768), },
- {AOM_ICDF(14074), AOM_ICDF(25147), AOM_ICDF(31045), AOM_ICDF(31638),
- AOM_ICDF(32768), },
- {AOM_ICDF(13598), AOM_ICDF(20524), AOM_ICDF(28818), AOM_ICDF(29894),
- AOM_ICDF(32768), },
- {AOM_ICDF(10035), AOM_ICDF(13322), AOM_ICDF(25086), AOM_ICDF(26332),
- AOM_ICDF(32768), },
- {AOM_ICDF(7156), AOM_ICDF(8035), AOM_ICDF(18456), AOM_ICDF(19334),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(10923), AOM_ICDF(19115), AOM_ICDF(21845),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22787), AOM_ICDF(27489), AOM_ICDF(31676), AOM_ICDF(32026),
- AOM_ICDF(32768), },
- {AOM_ICDF(17518), AOM_ICDF(23800), AOM_ICDF(31204), AOM_ICDF(31578),
- AOM_ICDF(32768), },
- {AOM_ICDF(10686), AOM_ICDF(15226), AOM_ICDF(28087), AOM_ICDF(28560),
- AOM_ICDF(32768), },
- {AOM_ICDF(9612), AOM_ICDF(11942), AOM_ICDF(22574), AOM_ICDF(23010),
- AOM_ICDF(32768), },
- {AOM_ICDF(6437), AOM_ICDF(8192), AOM_ICDF(18139), AOM_ICDF(19895),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26773), AOM_ICDF(28429), AOM_ICDF(31782), AOM_ICDF(32120),
- AOM_ICDF(32768), },
- {AOM_ICDF(18449), AOM_ICDF(22329), AOM_ICDF(30991), AOM_ICDF(31329),
- AOM_ICDF(32768), },
- {AOM_ICDF(12861), AOM_ICDF(14182), AOM_ICDF(27130), AOM_ICDF(27395),
- AOM_ICDF(32768), },
- {AOM_ICDF(4681), AOM_ICDF(6554), AOM_ICDF(22469), AOM_ICDF(23874),
- AOM_ICDF(32768), },
- {AOM_ICDF(8623), AOM_ICDF(13797), AOM_ICDF(22420), AOM_ICDF(27594),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28378), AOM_ICDF(29466), AOM_ICDF(31934), AOM_ICDF(32245),
- AOM_ICDF(32768), },
- {AOM_ICDF(19880), AOM_ICDF(21733), AOM_ICDF(31206), AOM_ICDF(31550),
- AOM_ICDF(32768), },
- {AOM_ICDF(12173), AOM_ICDF(13245), AOM_ICDF(27638), AOM_ICDF(27945),
- AOM_ICDF(32768), },
- {AOM_ICDF(6215), AOM_ICDF(7910), AOM_ICDF(19774), AOM_ICDF(21469),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30333), AOM_ICDF(31015), AOM_ICDF(32078), AOM_ICDF(32401),
- AOM_ICDF(32768), },
- {AOM_ICDF(19277), AOM_ICDF(21376), AOM_ICDF(31072), AOM_ICDF(31407),
- AOM_ICDF(32768), },
- {AOM_ICDF(12978), AOM_ICDF(13724), AOM_ICDF(28144), AOM_ICDF(28442),
- AOM_ICDF(32768), },
- {AOM_ICDF(10031), AOM_ICDF(12037), AOM_ICDF(25412), AOM_ICDF(27418),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(29777), AOM_ICDF(30229), AOM_ICDF(31726), AOM_ICDF(32104),
- AOM_ICDF(32440), AOM_ICDF(32768), },
- {AOM_ICDF(18551), AOM_ICDF(20755), AOM_ICDF(29778), AOM_ICDF(30685),
- AOM_ICDF(31935), AOM_ICDF(32768), },
- {AOM_ICDF(6236), AOM_ICDF(13170), AOM_ICDF(24037), AOM_ICDF(25823),
- AOM_ICDF(30798), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(28890), AOM_ICDF(30863), AOM_ICDF(32128), AOM_ICDF(32440),
- AOM_ICDF(32768), },
- {AOM_ICDF(17311), AOM_ICDF(27082), AOM_ICDF(31871), AOM_ICDF(32209),
- AOM_ICDF(32768), },
- {AOM_ICDF(13447), AOM_ICDF(25217), AOM_ICDF(31158), AOM_ICDF(31793),
- AOM_ICDF(32768), },
- {AOM_ICDF(11906), AOM_ICDF(20177), AOM_ICDF(29976), AOM_ICDF(30713),
- AOM_ICDF(32768), },
- {AOM_ICDF(14883), AOM_ICDF(17134), AOM_ICDF(27140), AOM_ICDF(28266),
- AOM_ICDF(32768), },
- {AOM_ICDF(14959), AOM_ICDF(17096), AOM_ICDF(22795), AOM_ICDF(25645),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(29494), AOM_ICDF(30807), AOM_ICDF(32086), AOM_ICDF(32404),
- AOM_ICDF(32768), },
- {AOM_ICDF(19860), AOM_ICDF(25179), AOM_ICDF(31857), AOM_ICDF(32190),
- AOM_ICDF(32768), },
- {AOM_ICDF(13936), AOM_ICDF(19209), AOM_ICDF(30508), AOM_ICDF(31073),
- AOM_ICDF(32768), },
- {AOM_ICDF(7168), AOM_ICDF(10240), AOM_ICDF(24576), AOM_ICDF(27648),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(30496), AOM_ICDF(31243), AOM_ICDF(32121), AOM_ICDF(32433),
- AOM_ICDF(32768), },
- {AOM_ICDF(21369), AOM_ICDF(24262), AOM_ICDF(31827), AOM_ICDF(32158),
- AOM_ICDF(32768), },
- {AOM_ICDF(18971), AOM_ICDF(21127), AOM_ICDF(29319), AOM_ICDF(30612),
- AOM_ICDF(32768), },
- {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(30922), AOM_ICDF(31459), AOM_ICDF(32136), AOM_ICDF(32449),
- AOM_ICDF(32768), },
- {AOM_ICDF(22640), AOM_ICDF(24782), AOM_ICDF(31768), AOM_ICDF(32076),
- AOM_ICDF(32768), },
- {AOM_ICDF(12955), AOM_ICDF(14860), AOM_ICDF(28958), AOM_ICDF(30101),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(21845), AOM_ICDF(27307),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30469), AOM_ICDF(31279), AOM_ICDF(32115), AOM_ICDF(32446),
- AOM_ICDF(32768), },
- {AOM_ICDF(19748), AOM_ICDF(24367), AOM_ICDF(31900), AOM_ICDF(32257),
- AOM_ICDF(32768), },
- {AOM_ICDF(12684), AOM_ICDF(16120), AOM_ICDF(30125), AOM_ICDF(30918),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 32X32
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(8402), AOM_ICDF(9860), AOM_ICDF(23425), AOM_ICDF(26798),
- AOM_ICDF(28753), AOM_ICDF(32768), },
- {AOM_ICDF(4503), AOM_ICDF(7478), AOM_ICDF(14541), AOM_ICDF(19455),
- AOM_ICDF(21058), AOM_ICDF(32768), },
- {AOM_ICDF(1404), AOM_ICDF(4914), AOM_ICDF(7456), AOM_ICDF(13239),
- AOM_ICDF(14005), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(11786), AOM_ICDF(17804), AOM_ICDF(26686), AOM_ICDF(27285),
- AOM_ICDF(32768), },
- {AOM_ICDF(10456), AOM_ICDF(16685), AOM_ICDF(26272), AOM_ICDF(27135),
- AOM_ICDF(32768), },
- {AOM_ICDF(8297), AOM_ICDF(12591), AOM_ICDF(23088), AOM_ICDF(24288),
- AOM_ICDF(32768), },
- {AOM_ICDF(6320), AOM_ICDF(8297), AOM_ICDF(18902), AOM_ICDF(20112),
- AOM_ICDF(32768), },
- {AOM_ICDF(4385), AOM_ICDF(4892), AOM_ICDF(12779), AOM_ICDF(13476),
- AOM_ICDF(32768), },
- {AOM_ICDF(2151), AOM_ICDF(2470), AOM_ICDF(6432), AOM_ICDF(6758),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(17988), AOM_ICDF(21025), AOM_ICDF(29658), AOM_ICDF(30075),
- AOM_ICDF(32768), },
- {AOM_ICDF(14641), AOM_ICDF(18188), AOM_ICDF(28759), AOM_ICDF(29202),
- AOM_ICDF(32768), },
- {AOM_ICDF(10951), AOM_ICDF(12924), AOM_ICDF(25087), AOM_ICDF(25515),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(9165), AOM_ICDF(20302), AOM_ICDF(20696),
- AOM_ICDF(32768), },
- {AOM_ICDF(5213), AOM_ICDF(5567), AOM_ICDF(14740), AOM_ICDF(15114),
- AOM_ICDF(32768), },
- {AOM_ICDF(2785), AOM_ICDF(3096), AOM_ICDF(8153), AOM_ICDF(8465),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22839), AOM_ICDF(24625), AOM_ICDF(31013), AOM_ICDF(31343),
- AOM_ICDF(32768), },
- {AOM_ICDF(16111), AOM_ICDF(18689), AOM_ICDF(29552), AOM_ICDF(29896),
- AOM_ICDF(32768), },
- {AOM_ICDF(10736), AOM_ICDF(11502), AOM_ICDF(24493), AOM_ICDF(24827),
- AOM_ICDF(32768), },
- {AOM_ICDF(7153), AOM_ICDF(7570), AOM_ICDF(18744), AOM_ICDF(19067),
- AOM_ICDF(32768), },
- {AOM_ICDF(4285), AOM_ICDF(4591), AOM_ICDF(11651), AOM_ICDF(11957),
- AOM_ICDF(32768), },
- {AOM_ICDF(2064), AOM_ICDF(2322), AOM_ICDF(6321), AOM_ICDF(6579),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24955), AOM_ICDF(26499), AOM_ICDF(31625), AOM_ICDF(31948),
- AOM_ICDF(32768), },
- {AOM_ICDF(17242), AOM_ICDF(19354), AOM_ICDF(30096), AOM_ICDF(30432),
- AOM_ICDF(32768), },
- {AOM_ICDF(10470), AOM_ICDF(11049), AOM_ICDF(24405), AOM_ICDF(24742),
- AOM_ICDF(32768), },
- {AOM_ICDF(6717), AOM_ICDF(7038), AOM_ICDF(17553), AOM_ICDF(17870),
- AOM_ICDF(32768), },
- {AOM_ICDF(4030), AOM_ICDF(4342), AOM_ICDF(11280), AOM_ICDF(11592),
- AOM_ICDF(32768), },
- {AOM_ICDF(2060), AOM_ICDF(2355), AOM_ICDF(6966), AOM_ICDF(7260),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29697), AOM_ICDF(30286), AOM_ICDF(32009), AOM_ICDF(32325),
- AOM_ICDF(32768), },
- {AOM_ICDF(18629), AOM_ICDF(19720), AOM_ICDF(30251), AOM_ICDF(30574),
- AOM_ICDF(32768), },
- {AOM_ICDF(9459), AOM_ICDF(9826), AOM_ICDF(22948), AOM_ICDF(23264),
- AOM_ICDF(32768), },
- {AOM_ICDF(5742), AOM_ICDF(6057), AOM_ICDF(16269), AOM_ICDF(16580),
- AOM_ICDF(32768), },
- {AOM_ICDF(3696), AOM_ICDF(4006), AOM_ICDF(11276), AOM_ICDF(11586),
- AOM_ICDF(32768), },
- {AOM_ICDF(2359), AOM_ICDF(2614), AOM_ICDF(5801), AOM_ICDF(6056),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(14224), AOM_ICDF(15827), AOM_ICDF(27984), AOM_ICDF(30263),
- AOM_ICDF(31458), AOM_ICDF(32768), },
- {AOM_ICDF(4253), AOM_ICDF(7150), AOM_ICDF(20729), AOM_ICDF(24629),
- AOM_ICDF(28621), AOM_ICDF(32768), },
- {AOM_ICDF(1405), AOM_ICDF(5159), AOM_ICDF(12422), AOM_ICDF(17006),
- AOM_ICDF(24088), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(20029), AOM_ICDF(23525), AOM_ICDF(30941), AOM_ICDF(31369),
- AOM_ICDF(32768), },
- {AOM_ICDF(15691), AOM_ICDF(22792), AOM_ICDF(30520), AOM_ICDF(30960),
- AOM_ICDF(32768), },
- {AOM_ICDF(12036), AOM_ICDF(18829), AOM_ICDF(28256), AOM_ICDF(29025),
- AOM_ICDF(32768), },
- {AOM_ICDF(10881), AOM_ICDF(14586), AOM_ICDF(25416), AOM_ICDF(26318),
- AOM_ICDF(32768), },
- {AOM_ICDF(11249), AOM_ICDF(13311), AOM_ICDF(23713), AOM_ICDF(24498),
- AOM_ICDF(32768), },
- {AOM_ICDF(9444), AOM_ICDF(10609), AOM_ICDF(20170), AOM_ICDF(21025),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23805), AOM_ICDF(26370), AOM_ICDF(31579), AOM_ICDF(31927),
- AOM_ICDF(32768), },
- {AOM_ICDF(16685), AOM_ICDF(21243), AOM_ICDF(30526), AOM_ICDF(30890),
- AOM_ICDF(32768), },
- {AOM_ICDF(11661), AOM_ICDF(14143), AOM_ICDF(26804), AOM_ICDF(27193),
- AOM_ICDF(32768), },
- {AOM_ICDF(8321), AOM_ICDF(9593), AOM_ICDF(21814), AOM_ICDF(22228),
- AOM_ICDF(32768), },
- {AOM_ICDF(6243), AOM_ICDF(6820), AOM_ICDF(16151), AOM_ICDF(16506),
- AOM_ICDF(32768), },
- {AOM_ICDF(3612), AOM_ICDF(4386), AOM_ICDF(9547), AOM_ICDF(10321),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26022), AOM_ICDF(27534), AOM_ICDF(31845), AOM_ICDF(32167),
- AOM_ICDF(32768), },
- {AOM_ICDF(18692), AOM_ICDF(21351), AOM_ICDF(30871), AOM_ICDF(31203),
- AOM_ICDF(32768), },
- {AOM_ICDF(11493), AOM_ICDF(12410), AOM_ICDF(26280), AOM_ICDF(26619),
- AOM_ICDF(32768), },
- {AOM_ICDF(7099), AOM_ICDF(7581), AOM_ICDF(19315), AOM_ICDF(19619),
- AOM_ICDF(32768), },
- {AOM_ICDF(3329), AOM_ICDF(3623), AOM_ICDF(10868), AOM_ICDF(11162),
- AOM_ICDF(32768), },
- {AOM_ICDF(3104), AOM_ICDF(4139), AOM_ICDF(10003), AOM_ICDF(11038),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28126), AOM_ICDF(29216), AOM_ICDF(32027), AOM_ICDF(32345),
- AOM_ICDF(32768), },
- {AOM_ICDF(19828), AOM_ICDF(22063), AOM_ICDF(31140), AOM_ICDF(31465),
- AOM_ICDF(32768), },
- {AOM_ICDF(11206), AOM_ICDF(11832), AOM_ICDF(25718), AOM_ICDF(26041),
- AOM_ICDF(32768), },
- {AOM_ICDF(6496), AOM_ICDF(6825), AOM_ICDF(18069), AOM_ICDF(18408),
- AOM_ICDF(32768), },
- {AOM_ICDF(4600), AOM_ICDF(4904), AOM_ICDF(12431), AOM_ICDF(12735),
- AOM_ICDF(32768), },
- {AOM_ICDF(2016), AOM_ICDF(3529), AOM_ICDF(8066), AOM_ICDF(9578),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30246), AOM_ICDF(30814), AOM_ICDF(32096), AOM_ICDF(32411),
- AOM_ICDF(32768), },
- {AOM_ICDF(21165), AOM_ICDF(22238), AOM_ICDF(31122), AOM_ICDF(31445),
- AOM_ICDF(32768), },
- {AOM_ICDF(10123), AOM_ICDF(10519), AOM_ICDF(24102), AOM_ICDF(24419),
- AOM_ICDF(32768), },
- {AOM_ICDF(5968), AOM_ICDF(6277), AOM_ICDF(17606), AOM_ICDF(17924),
- AOM_ICDF(32768), },
- {AOM_ICDF(4312), AOM_ICDF(4620), AOM_ICDF(12131), AOM_ICDF(12439),
- AOM_ICDF(32768), },
- {AOM_ICDF(4608), AOM_ICDF(6144), AOM_ICDF(9216), AOM_ICDF(10752),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(22808), AOM_ICDF(23508), AOM_ICDF(29956), AOM_ICDF(30649),
- AOM_ICDF(31698), AOM_ICDF(32768), },
- {AOM_ICDF(11001), AOM_ICDF(12792), AOM_ICDF(25018), AOM_ICDF(27680),
- AOM_ICDF(29623), AOM_ICDF(32768), },
- {AOM_ICDF(6919), AOM_ICDF(10026), AOM_ICDF(19635), AOM_ICDF(24728),
- AOM_ICDF(26490), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(12861), AOM_ICDF(25068), AOM_ICDF(30802), AOM_ICDF(31375),
- AOM_ICDF(32768), },
- {AOM_ICDF(11298), AOM_ICDF(21545), AOM_ICDF(29953), AOM_ICDF(30816),
- AOM_ICDF(32768), },
- {AOM_ICDF(13053), AOM_ICDF(24270), AOM_ICDF(28485), AOM_ICDF(29845),
- AOM_ICDF(32768), },
- {AOM_ICDF(7710), AOM_ICDF(15059), AOM_ICDF(26383), AOM_ICDF(28431),
- AOM_ICDF(32768), },
- {AOM_ICDF(8856), AOM_ICDF(10332), AOM_ICDF(18008), AOM_ICDF(19779),
- AOM_ICDF(32768), },
- {AOM_ICDF(3855), AOM_ICDF(7710), AOM_ICDF(19275), AOM_ICDF(22167),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(19458), AOM_ICDF(25796), AOM_ICDF(31754), AOM_ICDF(32007),
- AOM_ICDF(32768), },
- {AOM_ICDF(16458), AOM_ICDF(23827), AOM_ICDF(31294), AOM_ICDF(31638),
- AOM_ICDF(32768), },
- {AOM_ICDF(16274), AOM_ICDF(18913), AOM_ICDF(28150), AOM_ICDF(29029),
- AOM_ICDF(32768), },
- {AOM_ICDF(12429), AOM_ICDF(15254), AOM_ICDF(24858), AOM_ICDF(26553),
- AOM_ICDF(32768), },
- {AOM_ICDF(7399), AOM_ICDF(11627), AOM_ICDF(21141), AOM_ICDF(24312),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(17348), AOM_ICDF(23130),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25493), AOM_ICDF(28975), AOM_ICDF(31960), AOM_ICDF(32271),
- AOM_ICDF(32768), },
- {AOM_ICDF(16904), AOM_ICDF(21759), AOM_ICDF(31381), AOM_ICDF(31728),
- AOM_ICDF(32768), },
- {AOM_ICDF(9709), AOM_ICDF(11529), AOM_ICDF(24879), AOM_ICDF(26700),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(13107), AOM_ICDF(22938), AOM_ICDF(27853),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(20025), AOM_ICDF(25486),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26127), AOM_ICDF(28926), AOM_ICDF(31725), AOM_ICDF(32274),
- AOM_ICDF(32768), },
- {AOM_ICDF(17673), AOM_ICDF(25036), AOM_ICDF(31940), AOM_ICDF(32216),
- AOM_ICDF(32768), },
- {AOM_ICDF(14824), AOM_ICDF(17164), AOM_ICDF(26526), AOM_ICDF(28867),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(16384), AOM_ICDF(21845), AOM_ICDF(27307),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30683), AOM_ICDF(31149), AOM_ICDF(32155), AOM_ICDF(32449),
- AOM_ICDF(32768), },
- {AOM_ICDF(17896), AOM_ICDF(22055), AOM_ICDF(31508), AOM_ICDF(31886),
- AOM_ICDF(32768), },
- {AOM_ICDF(8548), AOM_ICDF(12822), AOM_ICDF(24220), AOM_ICDF(28494),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(27393), AOM_ICDF(28900), AOM_ICDF(31555), AOM_ICDF(31971),
- AOM_ICDF(32368), AOM_ICDF(32768), },
- {AOM_ICDF(8379), AOM_ICDF(19364), AOM_ICDF(27675), AOM_ICDF(28688),
- AOM_ICDF(31114), AOM_ICDF(32768), },
- {AOM_ICDF(1955), AOM_ICDF(19256), AOM_ICDF(24580), AOM_ICDF(25370),
- AOM_ICDF(30257), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(31085), AOM_ICDF(31718), AOM_ICDF(32129), AOM_ICDF(32443),
- AOM_ICDF(32768), },
- {AOM_ICDF(14336), AOM_ICDF(26852), AOM_ICDF(31370), AOM_ICDF(31760),
- AOM_ICDF(32768), },
- {AOM_ICDF(11751), AOM_ICDF(23544), AOM_ICDF(28851), AOM_ICDF(29567),
- AOM_ICDF(32768), },
- {AOM_ICDF(14670), AOM_ICDF(21251), AOM_ICDF(28381), AOM_ICDF(29752),
- AOM_ICDF(32768), },
- {AOM_ICDF(14832), AOM_ICDF(19316), AOM_ICDF(27134), AOM_ICDF(28974),
- AOM_ICDF(32768), },
- {AOM_ICDF(13312), AOM_ICDF(15360), AOM_ICDF(25600), AOM_ICDF(27648),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(31302), AOM_ICDF(31746), AOM_ICDF(32144), AOM_ICDF(32455),
- AOM_ICDF(32768), },
- {AOM_ICDF(18343), AOM_ICDF(26723), AOM_ICDF(32018), AOM_ICDF(32434),
- AOM_ICDF(32768), },
- {AOM_ICDF(10570), AOM_ICDF(16913), AOM_ICDF(29068), AOM_ICDF(30125),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(13797), AOM_ICDF(24145), AOM_ICDF(26732),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(31420), AOM_ICDF(31795), AOM_ICDF(32144), AOM_ICDF(32455),
- AOM_ICDF(32768), },
- {AOM_ICDF(21510), AOM_ICDF(28245), AOM_ICDF(32064), AOM_ICDF(32366),
- AOM_ICDF(32768), },
- {AOM_ICDF(6342), AOM_ICDF(11627), AOM_ICDF(25369), AOM_ICDF(28540),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(31470), AOM_ICDF(31806), AOM_ICDF(32143), AOM_ICDF(32455),
- AOM_ICDF(32768), },
- {AOM_ICDF(19571), AOM_ICDF(25722), AOM_ICDF(31538), AOM_ICDF(31985),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(8738), AOM_ICDF(25122), AOM_ICDF(28399),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(31292), AOM_ICDF(31637), AOM_ICDF(32104), AOM_ICDF(32431),
- AOM_ICDF(32768), },
- {AOM_ICDF(12774), AOM_ICDF(16652), AOM_ICDF(30002), AOM_ICDF(30986),
- AOM_ICDF(32768), },
- {AOM_ICDF(4652), AOM_ICDF(11442), AOM_ICDF(30231), AOM_ICDF(30593),
- AOM_ICDF(32768), },
- {AOM_ICDF(7022), AOM_ICDF(10031), AOM_ICDF(28087), AOM_ICDF(29090),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
-};
+static const aom_cdf_prob av1_default_coeff_lps_multi_cdfs
+ [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
+ [CDF_SIZE(BR_CDF_SIZE)] = {
+ { { { { AOM_CDF4(14298, 20718, 24174) },
+ { AOM_CDF4(12536, 19601, 23789) },
+ { AOM_CDF4(8712, 15051, 19503) },
+ { AOM_CDF4(6170, 11327, 15434) },
+ { AOM_CDF4(4742, 8926, 12538) },
+ { AOM_CDF4(3803, 7317, 10546) },
+ { AOM_CDF4(1696, 3317, 4871) },
+ { AOM_CDF4(14392, 19951, 22756) },
+ { AOM_CDF4(15978, 23218, 26818) },
+ { AOM_CDF4(12187, 19474, 23889) },
+ { AOM_CDF4(9176, 15640, 20259) },
+ { AOM_CDF4(7068, 12655, 17028) },
+ { AOM_CDF4(5656, 10442, 14472) },
+ { AOM_CDF4(2580, 4992, 7244) },
+ { AOM_CDF4(12136, 18049, 21426) },
+ { AOM_CDF4(13784, 20721, 24481) },
+ { AOM_CDF4(10836, 17621, 21900) },
+ { AOM_CDF4(8372, 14444, 18847) },
+ { AOM_CDF4(6523, 11779, 16000) },
+ { AOM_CDF4(5337, 9898, 13760) },
+ { AOM_CDF4(3034, 5860, 8462) } },
+ { { AOM_CDF4(15967, 22905, 26286) },
+ { AOM_CDF4(13534, 20654, 24579) },
+ { AOM_CDF4(9504, 16092, 20535) },
+ { AOM_CDF4(6975, 12568, 16903) },
+ { AOM_CDF4(5364, 10091, 14020) },
+ { AOM_CDF4(4357, 8370, 11857) },
+ { AOM_CDF4(2506, 4934, 7218) },
+ { AOM_CDF4(23032, 28815, 30936) },
+ { AOM_CDF4(19540, 26704, 29719) },
+ { AOM_CDF4(15158, 22969, 27097) },
+ { AOM_CDF4(11408, 18865, 23650) },
+ { AOM_CDF4(8885, 15448, 20250) },
+ { AOM_CDF4(7108, 12853, 17416) },
+ { AOM_CDF4(4231, 8041, 11480) },
+ { AOM_CDF4(19823, 26490, 29156) },
+ { AOM_CDF4(18890, 25929, 28932) },
+ { AOM_CDF4(15660, 23491, 27433) },
+ { AOM_CDF4(12147, 19776, 24488) },
+ { AOM_CDF4(9728, 16774, 21649) },
+ { AOM_CDF4(7919, 14277, 19066) },
+ { AOM_CDF4(5440, 10170, 14185) } } },
+ { { { AOM_CDF4(14406, 20862, 24414) },
+ { AOM_CDF4(11824, 18907, 23109) },
+ { AOM_CDF4(8257, 14393, 18803) },
+ { AOM_CDF4(5860, 10747, 14778) },
+ { AOM_CDF4(4475, 8486, 11984) },
+ { AOM_CDF4(3606, 6954, 10043) },
+ { AOM_CDF4(1736, 3410, 5048) },
+ { AOM_CDF4(14430, 20046, 22882) },
+ { AOM_CDF4(15593, 22899, 26709) },
+ { AOM_CDF4(12102, 19368, 23811) },
+ { AOM_CDF4(9059, 15584, 20262) },
+ { AOM_CDF4(6999, 12603, 17048) },
+ { AOM_CDF4(5684, 10497, 14553) },
+ { AOM_CDF4(2822, 5438, 7862) },
+ { AOM_CDF4(15785, 21585, 24359) },
+ { AOM_CDF4(18347, 25229, 28266) },
+ { AOM_CDF4(14974, 22487, 26389) },
+ { AOM_CDF4(11423, 18681, 23271) },
+ { AOM_CDF4(8863, 15350, 20008) },
+ { AOM_CDF4(7153, 12852, 17278) },
+ { AOM_CDF4(3707, 7036, 9982) } },
+ { { AOM_CDF4(15460, 21696, 25469) },
+ { AOM_CDF4(12170, 19249, 23191) },
+ { AOM_CDF4(8723, 15027, 19332) },
+ { AOM_CDF4(6428, 11704, 15874) },
+ { AOM_CDF4(4922, 9292, 13052) },
+ { AOM_CDF4(4139, 7695, 11010) },
+ { AOM_CDF4(2291, 4508, 6598) },
+ { AOM_CDF4(19856, 26920, 29828) },
+ { AOM_CDF4(17923, 25289, 28792) },
+ { AOM_CDF4(14278, 21968, 26297) },
+ { AOM_CDF4(10910, 18136, 22950) },
+ { AOM_CDF4(8423, 14815, 19627) },
+ { AOM_CDF4(6771, 12283, 16774) },
+ { AOM_CDF4(4074, 7750, 11081) },
+ { AOM_CDF4(19852, 26074, 28672) },
+ { AOM_CDF4(19371, 26110, 28989) },
+ { AOM_CDF4(16265, 23873, 27663) },
+ { AOM_CDF4(12758, 20378, 24952) },
+ { AOM_CDF4(10095, 17098, 21961) },
+ { AOM_CDF4(8250, 14628, 19451) },
+ { AOM_CDF4(5205, 9745, 13622) } } },
+ { { { AOM_CDF4(10563, 16233, 19763) },
+ { AOM_CDF4(9794, 16022, 19804) },
+ { AOM_CDF4(6750, 11945, 15759) },
+ { AOM_CDF4(4963, 9186, 12752) },
+ { AOM_CDF4(3845, 7435, 10627) },
+ { AOM_CDF4(3051, 6085, 8834) },
+ { AOM_CDF4(1311, 2596, 3830) },
+ { AOM_CDF4(11246, 16404, 19689) },
+ { AOM_CDF4(12315, 18911, 22731) },
+ { AOM_CDF4(10557, 17095, 21289) },
+ { AOM_CDF4(8136, 14006, 18249) },
+ { AOM_CDF4(6348, 11474, 15565) },
+ { AOM_CDF4(5196, 9655, 13400) },
+ { AOM_CDF4(2349, 4526, 6587) },
+ { AOM_CDF4(13337, 18730, 21569) },
+ { AOM_CDF4(19306, 26071, 28882) },
+ { AOM_CDF4(15952, 23540, 27254) },
+ { AOM_CDF4(12409, 19934, 24430) },
+ { AOM_CDF4(9760, 16706, 21389) },
+ { AOM_CDF4(8004, 14220, 18818) },
+ { AOM_CDF4(4138, 7794, 10961) } },
+ { { AOM_CDF4(10870, 16684, 20949) },
+ { AOM_CDF4(9664, 15230, 18680) },
+ { AOM_CDF4(6886, 12109, 15408) },
+ { AOM_CDF4(4825, 8900, 12305) },
+ { AOM_CDF4(3630, 7162, 10314) },
+ { AOM_CDF4(3036, 6429, 9387) },
+ { AOM_CDF4(1671, 3296, 4940) },
+ { AOM_CDF4(13819, 19159, 23026) },
+ { AOM_CDF4(11984, 19108, 23120) },
+ { AOM_CDF4(10690, 17210, 21663) },
+ { AOM_CDF4(7984, 14154, 18333) },
+ { AOM_CDF4(6868, 12294, 16124) },
+ { AOM_CDF4(5274, 8994, 12868) },
+ { AOM_CDF4(2988, 5771, 8424) },
+ { AOM_CDF4(19736, 26647, 29141) },
+ { AOM_CDF4(18933, 26070, 28984) },
+ { AOM_CDF4(15779, 23048, 27200) },
+ { AOM_CDF4(12638, 20061, 24532) },
+ { AOM_CDF4(10692, 17545, 22220) },
+ { AOM_CDF4(9217, 15251, 20054) },
+ { AOM_CDF4(5078, 9284, 12594) } } },
+ { { { AOM_CDF4(2331, 3662, 5244) },
+ { AOM_CDF4(2891, 4771, 6145) },
+ { AOM_CDF4(4598, 7623, 9729) },
+ { AOM_CDF4(3520, 6845, 9199) },
+ { AOM_CDF4(3417, 6119, 9324) },
+ { AOM_CDF4(2601, 5412, 7385) },
+ { AOM_CDF4(600, 1173, 1744) },
+ { AOM_CDF4(7672, 13286, 17469) },
+ { AOM_CDF4(4232, 7792, 10793) },
+ { AOM_CDF4(2915, 5317, 7397) },
+ { AOM_CDF4(2318, 4356, 6152) },
+ { AOM_CDF4(2127, 4000, 5554) },
+ { AOM_CDF4(1850, 3478, 5275) },
+ { AOM_CDF4(977, 1933, 2843) },
+ { AOM_CDF4(18280, 24387, 27989) },
+ { AOM_CDF4(15852, 22671, 26185) },
+ { AOM_CDF4(13845, 20951, 24789) },
+ { AOM_CDF4(11055, 17966, 22129) },
+ { AOM_CDF4(9138, 15422, 19801) },
+ { AOM_CDF4(7454, 13145, 17456) },
+ { AOM_CDF4(3370, 6393, 9013) } },
+ { { AOM_CDF4(5842, 9229, 10838) },
+ { AOM_CDF4(2313, 3491, 4276) },
+ { AOM_CDF4(2998, 6104, 7496) },
+ { AOM_CDF4(2420, 7447, 9868) },
+ { AOM_CDF4(3034, 8495, 10923) },
+ { AOM_CDF4(4076, 8937, 10975) },
+ { AOM_CDF4(1086, 2370, 3299) },
+ { AOM_CDF4(9714, 17254, 20444) },
+ { AOM_CDF4(8543, 13698, 17123) },
+ { AOM_CDF4(4918, 9007, 11910) },
+ { AOM_CDF4(4129, 7532, 10553) },
+ { AOM_CDF4(2364, 5533, 8058) },
+ { AOM_CDF4(1834, 3546, 5563) },
+ { AOM_CDF4(1473, 2908, 4133) },
+ { AOM_CDF4(15405, 21193, 25619) },
+ { AOM_CDF4(15691, 21952, 26561) },
+ { AOM_CDF4(12962, 19194, 24165) },
+ { AOM_CDF4(10272, 17855, 22129) },
+ { AOM_CDF4(8588, 15270, 20718) },
+ { AOM_CDF4(8682, 14669, 19500) },
+ { AOM_CDF4(4870, 9636, 13205) } } },
+ { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(14995, 21341, 24749) },
+ { AOM_CDF4(13158, 20289, 24601) },
+ { AOM_CDF4(8941, 15326, 19876) },
+ { AOM_CDF4(6297, 11541, 15807) },
+ { AOM_CDF4(4817, 9029, 12776) },
+ { AOM_CDF4(3731, 7273, 10627) },
+ { AOM_CDF4(1847, 3617, 5354) },
+ { AOM_CDF4(14472, 19659, 22343) },
+ { AOM_CDF4(16806, 24162, 27533) },
+ { AOM_CDF4(12900, 20404, 24713) },
+ { AOM_CDF4(9411, 16112, 20797) },
+ { AOM_CDF4(7056, 12697, 17148) },
+ { AOM_CDF4(5544, 10339, 14460) },
+ { AOM_CDF4(2954, 5704, 8319) },
+ { AOM_CDF4(12464, 18071, 21354) },
+ { AOM_CDF4(15482, 22528, 26034) },
+ { AOM_CDF4(12070, 19269, 23624) },
+ { AOM_CDF4(8953, 15406, 20106) },
+ { AOM_CDF4(7027, 12730, 17220) },
+ { AOM_CDF4(5887, 10913, 15140) },
+ { AOM_CDF4(3793, 7278, 10447) } },
+ { { AOM_CDF4(15571, 22232, 25749) },
+ { AOM_CDF4(14506, 21575, 25374) },
+ { AOM_CDF4(10189, 17089, 21569) },
+ { AOM_CDF4(7316, 13301, 17915) },
+ { AOM_CDF4(5783, 10912, 15190) },
+ { AOM_CDF4(4760, 9155, 13088) },
+ { AOM_CDF4(2993, 5966, 8774) },
+ { AOM_CDF4(23424, 28903, 30778) },
+ { AOM_CDF4(20775, 27666, 30290) },
+ { AOM_CDF4(16474, 24410, 28299) },
+ { AOM_CDF4(12471, 20180, 24987) },
+ { AOM_CDF4(9410, 16487, 21439) },
+ { AOM_CDF4(7536, 13614, 18529) },
+ { AOM_CDF4(5048, 9586, 13549) },
+ { AOM_CDF4(21090, 27290, 29756) },
+ { AOM_CDF4(20796, 27402, 30026) },
+ { AOM_CDF4(17819, 25485, 28969) },
+ { AOM_CDF4(13860, 21909, 26462) },
+ { AOM_CDF4(11002, 18494, 23529) },
+ { AOM_CDF4(8953, 15929, 20897) },
+ { AOM_CDF4(6448, 11918, 16454) } } },
+ { { { AOM_CDF4(15999, 22208, 25449) },
+ { AOM_CDF4(13050, 19988, 24122) },
+ { AOM_CDF4(8594, 14864, 19378) },
+ { AOM_CDF4(6033, 11079, 15238) },
+ { AOM_CDF4(4554, 8683, 12347) },
+ { AOM_CDF4(3672, 7139, 10337) },
+ { AOM_CDF4(1900, 3771, 5576) },
+ { AOM_CDF4(15788, 21340, 23949) },
+ { AOM_CDF4(16825, 24235, 27758) },
+ { AOM_CDF4(12873, 20402, 24810) },
+ { AOM_CDF4(9590, 16363, 21094) },
+ { AOM_CDF4(7352, 13209, 17733) },
+ { AOM_CDF4(5960, 10989, 15184) },
+ { AOM_CDF4(3232, 6234, 9007) },
+ { AOM_CDF4(15761, 20716, 23224) },
+ { AOM_CDF4(19318, 25989, 28759) },
+ { AOM_CDF4(15529, 23094, 26929) },
+ { AOM_CDF4(11662, 18989, 23641) },
+ { AOM_CDF4(8955, 15568, 20366) },
+ { AOM_CDF4(7281, 13106, 17708) },
+ { AOM_CDF4(4248, 8059, 11440) } },
+ { { AOM_CDF4(14899, 21217, 24503) },
+ { AOM_CDF4(13519, 20283, 24047) },
+ { AOM_CDF4(9429, 15966, 20365) },
+ { AOM_CDF4(6700, 12355, 16652) },
+ { AOM_CDF4(5088, 9704, 13716) },
+ { AOM_CDF4(4243, 8154, 11731) },
+ { AOM_CDF4(2702, 5364, 7861) },
+ { AOM_CDF4(22745, 28388, 30454) },
+ { AOM_CDF4(20235, 27146, 29922) },
+ { AOM_CDF4(15896, 23715, 27637) },
+ { AOM_CDF4(11840, 19350, 24131) },
+ { AOM_CDF4(9122, 15932, 20880) },
+ { AOM_CDF4(7488, 13581, 18362) },
+ { AOM_CDF4(5114, 9568, 13370) },
+ { AOM_CDF4(20845, 26553, 28932) },
+ { AOM_CDF4(20981, 27372, 29884) },
+ { AOM_CDF4(17781, 25335, 28785) },
+ { AOM_CDF4(13760, 21708, 26297) },
+ { AOM_CDF4(10975, 18415, 23365) },
+ { AOM_CDF4(9045, 15789, 20686) },
+ { AOM_CDF4(6130, 11199, 15423) } } },
+ { { { AOM_CDF4(13549, 19724, 23158) },
+ { AOM_CDF4(11844, 18382, 22246) },
+ { AOM_CDF4(7919, 13619, 17773) },
+ { AOM_CDF4(5486, 10143, 13946) },
+ { AOM_CDF4(4166, 7983, 11324) },
+ { AOM_CDF4(3364, 6506, 9427) },
+ { AOM_CDF4(1598, 3160, 4674) },
+ { AOM_CDF4(15281, 20979, 23781) },
+ { AOM_CDF4(14939, 22119, 25952) },
+ { AOM_CDF4(11363, 18407, 22812) },
+ { AOM_CDF4(8609, 14857, 19370) },
+ { AOM_CDF4(6737, 12184, 16480) },
+ { AOM_CDF4(5506, 10263, 14262) },
+ { AOM_CDF4(2990, 5786, 8380) },
+ { AOM_CDF4(20249, 25253, 27417) },
+ { AOM_CDF4(21070, 27518, 30001) },
+ { AOM_CDF4(16854, 24469, 28074) },
+ { AOM_CDF4(12864, 20486, 25000) },
+ { AOM_CDF4(9962, 16978, 21778) },
+ { AOM_CDF4(8074, 14338, 19048) },
+ { AOM_CDF4(4494, 8479, 11906) } },
+ { { AOM_CDF4(13960, 19617, 22829) },
+ { AOM_CDF4(11150, 17341, 21228) },
+ { AOM_CDF4(7150, 12964, 17190) },
+ { AOM_CDF4(5331, 10002, 13867) },
+ { AOM_CDF4(4167, 7744, 11057) },
+ { AOM_CDF4(3480, 6629, 9646) },
+ { AOM_CDF4(1883, 3784, 5686) },
+ { AOM_CDF4(18752, 25660, 28912) },
+ { AOM_CDF4(16968, 24586, 28030) },
+ { AOM_CDF4(13520, 21055, 25313) },
+ { AOM_CDF4(10453, 17626, 22280) },
+ { AOM_CDF4(8386, 14505, 19116) },
+ { AOM_CDF4(6742, 12595, 17008) },
+ { AOM_CDF4(4273, 8140, 11499) },
+ { AOM_CDF4(22120, 27827, 30233) },
+ { AOM_CDF4(20563, 27358, 29895) },
+ { AOM_CDF4(17076, 24644, 28153) },
+ { AOM_CDF4(13362, 20942, 25309) },
+ { AOM_CDF4(10794, 17965, 22695) },
+ { AOM_CDF4(9014, 15652, 20319) },
+ { AOM_CDF4(5708, 10512, 14497) } } },
+ { { { AOM_CDF4(5705, 10930, 15725) },
+ { AOM_CDF4(7946, 12765, 16115) },
+ { AOM_CDF4(6801, 12123, 16226) },
+ { AOM_CDF4(5462, 10135, 14200) },
+ { AOM_CDF4(4189, 8011, 11507) },
+ { AOM_CDF4(3191, 6229, 9408) },
+ { AOM_CDF4(1057, 2137, 3212) },
+ { AOM_CDF4(10018, 17067, 21491) },
+ { AOM_CDF4(7380, 12582, 16453) },
+ { AOM_CDF4(6068, 10845, 14339) },
+ { AOM_CDF4(5098, 9198, 12555) },
+ { AOM_CDF4(4312, 8010, 11119) },
+ { AOM_CDF4(3700, 6966, 9781) },
+ { AOM_CDF4(1693, 3326, 4887) },
+ { AOM_CDF4(18757, 24930, 27774) },
+ { AOM_CDF4(17648, 24596, 27817) },
+ { AOM_CDF4(14707, 22052, 26026) },
+ { AOM_CDF4(11720, 18852, 23292) },
+ { AOM_CDF4(9357, 15952, 20525) },
+ { AOM_CDF4(7810, 13753, 18210) },
+ { AOM_CDF4(3879, 7333, 10328) } },
+ { { AOM_CDF4(8278, 13242, 15922) },
+ { AOM_CDF4(10547, 15867, 18919) },
+ { AOM_CDF4(9106, 15842, 20609) },
+ { AOM_CDF4(6833, 13007, 17218) },
+ { AOM_CDF4(4811, 9712, 13923) },
+ { AOM_CDF4(3985, 7352, 11128) },
+ { AOM_CDF4(1688, 3458, 5262) },
+ { AOM_CDF4(12951, 21861, 26510) },
+ { AOM_CDF4(9788, 16044, 20276) },
+ { AOM_CDF4(6309, 11244, 14870) },
+ { AOM_CDF4(5183, 9349, 12566) },
+ { AOM_CDF4(4389, 8229, 11492) },
+ { AOM_CDF4(3633, 6945, 10620) },
+ { AOM_CDF4(3600, 6847, 9907) },
+ { AOM_CDF4(21748, 28137, 30255) },
+ { AOM_CDF4(19436, 26581, 29560) },
+ { AOM_CDF4(16359, 24201, 27953) },
+ { AOM_CDF4(13961, 21693, 25871) },
+ { AOM_CDF4(11544, 18686, 23322) },
+ { AOM_CDF4(9372, 16462, 20952) },
+ { AOM_CDF4(6138, 11210, 15390) } } },
+ { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(16138, 22223, 25509) },
+ { AOM_CDF4(15347, 22430, 26332) },
+ { AOM_CDF4(9614, 16736, 21332) },
+ { AOM_CDF4(6600, 12275, 16907) },
+ { AOM_CDF4(4811, 9424, 13547) },
+ { AOM_CDF4(3748, 7809, 11420) },
+ { AOM_CDF4(2254, 4587, 6890) },
+ { AOM_CDF4(15196, 20284, 23177) },
+ { AOM_CDF4(18317, 25469, 28451) },
+ { AOM_CDF4(13918, 21651, 25842) },
+ { AOM_CDF4(10052, 17150, 21995) },
+ { AOM_CDF4(7499, 13630, 18587) },
+ { AOM_CDF4(6158, 11417, 16003) },
+ { AOM_CDF4(4014, 7785, 11252) },
+ { AOM_CDF4(15048, 21067, 24384) },
+ { AOM_CDF4(18202, 25346, 28553) },
+ { AOM_CDF4(14302, 22019, 26356) },
+ { AOM_CDF4(10839, 18139, 23166) },
+ { AOM_CDF4(8715, 15744, 20806) },
+ { AOM_CDF4(7536, 13576, 18544) },
+ { AOM_CDF4(5413, 10335, 14498) } },
+ { { AOM_CDF4(17394, 24501, 27895) },
+ { AOM_CDF4(15889, 23420, 27185) },
+ { AOM_CDF4(11561, 19133, 23870) },
+ { AOM_CDF4(8285, 14812, 19844) },
+ { AOM_CDF4(6496, 12043, 16550) },
+ { AOM_CDF4(4771, 9574, 13677) },
+ { AOM_CDF4(3603, 6830, 10144) },
+ { AOM_CDF4(21656, 27704, 30200) },
+ { AOM_CDF4(21324, 27915, 30511) },
+ { AOM_CDF4(17327, 25336, 28997) },
+ { AOM_CDF4(13417, 21381, 26033) },
+ { AOM_CDF4(10132, 17425, 22338) },
+ { AOM_CDF4(8580, 15016, 19633) },
+ { AOM_CDF4(5694, 11477, 16411) },
+ { AOM_CDF4(24116, 29780, 31450) },
+ { AOM_CDF4(23853, 29695, 31591) },
+ { AOM_CDF4(20085, 27614, 30428) },
+ { AOM_CDF4(15326, 24335, 28575) },
+ { AOM_CDF4(11814, 19472, 24810) },
+ { AOM_CDF4(10221, 18611, 24767) },
+ { AOM_CDF4(7689, 14558, 20321) } } },
+ { { { AOM_CDF4(16214, 22380, 25770) },
+ { AOM_CDF4(14213, 21304, 25295) },
+ { AOM_CDF4(9213, 15823, 20455) },
+ { AOM_CDF4(6395, 11758, 16139) },
+ { AOM_CDF4(4779, 9187, 13066) },
+ { AOM_CDF4(3821, 7501, 10953) },
+ { AOM_CDF4(2293, 4567, 6795) },
+ { AOM_CDF4(15859, 21283, 23820) },
+ { AOM_CDF4(18404, 25602, 28726) },
+ { AOM_CDF4(14325, 21980, 26206) },
+ { AOM_CDF4(10669, 17937, 22720) },
+ { AOM_CDF4(8297, 14642, 19447) },
+ { AOM_CDF4(6746, 12389, 16893) },
+ { AOM_CDF4(4324, 8251, 11770) },
+ { AOM_CDF4(16532, 21631, 24475) },
+ { AOM_CDF4(20667, 27150, 29668) },
+ { AOM_CDF4(16728, 24510, 28175) },
+ { AOM_CDF4(12861, 20645, 25332) },
+ { AOM_CDF4(10076, 17361, 22417) },
+ { AOM_CDF4(8395, 14940, 19963) },
+ { AOM_CDF4(5731, 10683, 14912) } },
+ { { AOM_CDF4(14433, 21155, 24938) },
+ { AOM_CDF4(14658, 21716, 25545) },
+ { AOM_CDF4(9923, 16824, 21557) },
+ { AOM_CDF4(6982, 13052, 17721) },
+ { AOM_CDF4(5419, 10503, 15050) },
+ { AOM_CDF4(4852, 9162, 13014) },
+ { AOM_CDF4(3271, 6395, 9630) },
+ { AOM_CDF4(22210, 27833, 30109) },
+ { AOM_CDF4(20750, 27368, 29821) },
+ { AOM_CDF4(16894, 24828, 28573) },
+ { AOM_CDF4(13247, 21276, 25757) },
+ { AOM_CDF4(10038, 17265, 22563) },
+ { AOM_CDF4(8587, 14947, 20327) },
+ { AOM_CDF4(5645, 11371, 15252) },
+ { AOM_CDF4(22027, 27526, 29714) },
+ { AOM_CDF4(23098, 29146, 31221) },
+ { AOM_CDF4(19886, 27341, 30272) },
+ { AOM_CDF4(15609, 23747, 28046) },
+ { AOM_CDF4(11993, 20065, 24939) },
+ { AOM_CDF4(9637, 18267, 23671) },
+ { AOM_CDF4(7625, 13801, 19144) } } },
+ { { { AOM_CDF4(14438, 20798, 24089) },
+ { AOM_CDF4(12621, 19203, 23097) },
+ { AOM_CDF4(8177, 14125, 18402) },
+ { AOM_CDF4(5674, 10501, 14456) },
+ { AOM_CDF4(4236, 8239, 11733) },
+ { AOM_CDF4(3447, 6750, 9806) },
+ { AOM_CDF4(1986, 3950, 5864) },
+ { AOM_CDF4(16208, 22099, 24930) },
+ { AOM_CDF4(16537, 24025, 27585) },
+ { AOM_CDF4(12780, 20381, 24867) },
+ { AOM_CDF4(9767, 16612, 21416) },
+ { AOM_CDF4(7686, 13738, 18398) },
+ { AOM_CDF4(6333, 11614, 15964) },
+ { AOM_CDF4(3941, 7571, 10836) },
+ { AOM_CDF4(22819, 27422, 29202) },
+ { AOM_CDF4(22224, 28514, 30721) },
+ { AOM_CDF4(17660, 25433, 28913) },
+ { AOM_CDF4(13574, 21482, 26002) },
+ { AOM_CDF4(10629, 17977, 22938) },
+ { AOM_CDF4(8612, 15298, 20265) },
+ { AOM_CDF4(5607, 10491, 14596) } },
+ { { AOM_CDF4(13569, 19800, 23206) },
+ { AOM_CDF4(13128, 19924, 23869) },
+ { AOM_CDF4(8329, 14841, 19403) },
+ { AOM_CDF4(6130, 10976, 15057) },
+ { AOM_CDF4(4682, 8839, 12518) },
+ { AOM_CDF4(3656, 7409, 10588) },
+ { AOM_CDF4(2577, 5099, 7412) },
+ { AOM_CDF4(22427, 28684, 30585) },
+ { AOM_CDF4(20913, 27750, 30139) },
+ { AOM_CDF4(15840, 24109, 27834) },
+ { AOM_CDF4(12308, 20029, 24569) },
+ { AOM_CDF4(10216, 16785, 21458) },
+ { AOM_CDF4(8309, 14203, 19113) },
+ { AOM_CDF4(6043, 11168, 15307) },
+ { AOM_CDF4(23166, 28901, 30998) },
+ { AOM_CDF4(21899, 28405, 30751) },
+ { AOM_CDF4(18413, 26091, 29443) },
+ { AOM_CDF4(15233, 23114, 27352) },
+ { AOM_CDF4(12683, 20472, 25288) },
+ { AOM_CDF4(10702, 18259, 23409) },
+ { AOM_CDF4(8125, 14464, 19226) } } },
+ { { { AOM_CDF4(9040, 14786, 18360) },
+ { AOM_CDF4(9979, 15718, 19415) },
+ { AOM_CDF4(7913, 13918, 18311) },
+ { AOM_CDF4(5859, 10889, 15184) },
+ { AOM_CDF4(4593, 8677, 12510) },
+ { AOM_CDF4(3820, 7396, 10791) },
+ { AOM_CDF4(1730, 3471, 5192) },
+ { AOM_CDF4(11803, 18365, 22709) },
+ { AOM_CDF4(11419, 18058, 22225) },
+ { AOM_CDF4(9418, 15774, 20243) },
+ { AOM_CDF4(7539, 13325, 17657) },
+ { AOM_CDF4(6233, 11317, 15384) },
+ { AOM_CDF4(5137, 9656, 13545) },
+ { AOM_CDF4(2977, 5774, 8349) },
+ { AOM_CDF4(21207, 27246, 29640) },
+ { AOM_CDF4(19547, 26578, 29497) },
+ { AOM_CDF4(16169, 23871, 27690) },
+ { AOM_CDF4(12820, 20458, 25018) },
+ { AOM_CDF4(10224, 17332, 22214) },
+ { AOM_CDF4(8526, 15048, 19884) },
+ { AOM_CDF4(5037, 9410, 13118) } },
+ { { AOM_CDF4(12339, 17329, 20140) },
+ { AOM_CDF4(13505, 19895, 23225) },
+ { AOM_CDF4(9847, 16944, 21564) },
+ { AOM_CDF4(7280, 13256, 18348) },
+ { AOM_CDF4(4712, 10009, 14454) },
+ { AOM_CDF4(4361, 7914, 12477) },
+ { AOM_CDF4(2870, 5628, 7995) },
+ { AOM_CDF4(20061, 25504, 28526) },
+ { AOM_CDF4(15235, 22878, 26145) },
+ { AOM_CDF4(12985, 19958, 24155) },
+ { AOM_CDF4(9782, 16641, 21403) },
+ { AOM_CDF4(9456, 16360, 20760) },
+ { AOM_CDF4(6855, 12940, 18557) },
+ { AOM_CDF4(5661, 10564, 15002) },
+ { AOM_CDF4(25656, 30602, 31894) },
+ { AOM_CDF4(22570, 29107, 31092) },
+ { AOM_CDF4(18917, 26423, 29541) },
+ { AOM_CDF4(15940, 23649, 27754) },
+ { AOM_CDF4(12803, 20581, 25219) },
+ { AOM_CDF4(11082, 18695, 23376) },
+ { AOM_CDF4(7939, 14373, 19005) } } },
+ { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(18315, 24289, 27551) },
+ { AOM_CDF4(16854, 24068, 27835) },
+ { AOM_CDF4(10140, 17927, 23173) },
+ { AOM_CDF4(6722, 12982, 18267) },
+ { AOM_CDF4(4661, 9826, 14706) },
+ { AOM_CDF4(3832, 8165, 12294) },
+ { AOM_CDF4(2795, 6098, 9245) },
+ { AOM_CDF4(17145, 23326, 26672) },
+ { AOM_CDF4(20733, 27680, 30308) },
+ { AOM_CDF4(16032, 24461, 28546) },
+ { AOM_CDF4(11653, 20093, 25081) },
+ { AOM_CDF4(9290, 16429, 22086) },
+ { AOM_CDF4(7796, 14598, 19982) },
+ { AOM_CDF4(6502, 12378, 17441) },
+ { AOM_CDF4(21681, 27732, 30320) },
+ { AOM_CDF4(22389, 29044, 31261) },
+ { AOM_CDF4(19027, 26731, 30087) },
+ { AOM_CDF4(14739, 23755, 28624) },
+ { AOM_CDF4(11358, 20778, 25511) },
+ { AOM_CDF4(10995, 18073, 24190) },
+ { AOM_CDF4(9162, 14990, 20617) } },
+ { { AOM_CDF4(21425, 27952, 30388) },
+ { AOM_CDF4(18062, 25838, 29034) },
+ { AOM_CDF4(11956, 19881, 24808) },
+ { AOM_CDF4(7718, 15000, 20980) },
+ { AOM_CDF4(5702, 11254, 16143) },
+ { AOM_CDF4(4898, 9088, 16864) },
+ { AOM_CDF4(3679, 6776, 11907) },
+ { AOM_CDF4(23294, 30160, 31663) },
+ { AOM_CDF4(24397, 29896, 31836) },
+ { AOM_CDF4(19245, 27128, 30593) },
+ { AOM_CDF4(13202, 19825, 26404) },
+ { AOM_CDF4(11578, 19297, 23957) },
+ { AOM_CDF4(8073, 13297, 21370) },
+ { AOM_CDF4(5461, 10923, 19745) },
+ { AOM_CDF4(27367, 30521, 31934) },
+ { AOM_CDF4(24904, 30671, 31940) },
+ { AOM_CDF4(23075, 28460, 31299) },
+ { AOM_CDF4(14400, 23658, 30417) },
+ { AOM_CDF4(13885, 23882, 28325) },
+ { AOM_CDF4(14746, 22938, 27853) },
+ { AOM_CDF4(5461, 16384, 27307) } } },
+ { { { AOM_CDF4(18274, 24813, 27890) },
+ { AOM_CDF4(15537, 23149, 27003) },
+ { AOM_CDF4(9449, 16740, 21827) },
+ { AOM_CDF4(6700, 12498, 17261) },
+ { AOM_CDF4(4988, 9866, 14198) },
+ { AOM_CDF4(4236, 8147, 11902) },
+ { AOM_CDF4(2867, 5860, 8654) },
+ { AOM_CDF4(17124, 23171, 26101) },
+ { AOM_CDF4(20396, 27477, 30148) },
+ { AOM_CDF4(16573, 24629, 28492) },
+ { AOM_CDF4(12749, 20846, 25674) },
+ { AOM_CDF4(10233, 17878, 22818) },
+ { AOM_CDF4(8525, 15332, 20363) },
+ { AOM_CDF4(6283, 11632, 16255) },
+ { AOM_CDF4(20466, 26511, 29286) },
+ { AOM_CDF4(23059, 29174, 31191) },
+ { AOM_CDF4(19481, 27263, 30241) },
+ { AOM_CDF4(15458, 23631, 28137) },
+ { AOM_CDF4(12416, 20608, 25693) },
+ { AOM_CDF4(10261, 18011, 23261) },
+ { AOM_CDF4(8016, 14655, 19666) } },
+ { { AOM_CDF4(17616, 24586, 28112) },
+ { AOM_CDF4(15809, 23299, 27155) },
+ { AOM_CDF4(10767, 18890, 23793) },
+ { AOM_CDF4(7727, 14255, 18865) },
+ { AOM_CDF4(6129, 11926, 16882) },
+ { AOM_CDF4(4482, 9704, 14861) },
+ { AOM_CDF4(3277, 7452, 11522) },
+ { AOM_CDF4(22956, 28551, 30730) },
+ { AOM_CDF4(22724, 28937, 30961) },
+ { AOM_CDF4(18467, 26324, 29580) },
+ { AOM_CDF4(13234, 20713, 25649) },
+ { AOM_CDF4(11181, 17592, 22481) },
+ { AOM_CDF4(8291, 18358, 24576) },
+ { AOM_CDF4(7568, 11881, 14984) },
+ { AOM_CDF4(24948, 29001, 31147) },
+ { AOM_CDF4(25674, 30619, 32151) },
+ { AOM_CDF4(20841, 26793, 29603) },
+ { AOM_CDF4(14669, 24356, 28666) },
+ { AOM_CDF4(11334, 23593, 28219) },
+ { AOM_CDF4(8922, 14762, 22873) },
+ { AOM_CDF4(8301, 13544, 20535) } } },
+ { { { AOM_CDF4(17113, 23733, 27081) },
+ { AOM_CDF4(14139, 21406, 25452) },
+ { AOM_CDF4(8552, 15002, 19776) },
+ { AOM_CDF4(5871, 11120, 15378) },
+ { AOM_CDF4(4455, 8616, 12253) },
+ { AOM_CDF4(3469, 6910, 10386) },
+ { AOM_CDF4(2255, 4553, 6782) },
+ { AOM_CDF4(18224, 24376, 27053) },
+ { AOM_CDF4(19290, 26710, 29614) },
+ { AOM_CDF4(14936, 22991, 27184) },
+ { AOM_CDF4(11238, 18951, 23762) },
+ { AOM_CDF4(8786, 15617, 20588) },
+ { AOM_CDF4(7317, 13228, 18003) },
+ { AOM_CDF4(5101, 9512, 13493) },
+ { AOM_CDF4(22639, 28222, 30210) },
+ { AOM_CDF4(23216, 29331, 31307) },
+ { AOM_CDF4(19075, 26762, 29895) },
+ { AOM_CDF4(15014, 23113, 27457) },
+ { AOM_CDF4(11938, 19857, 24752) },
+ { AOM_CDF4(9942, 17280, 22282) },
+ { AOM_CDF4(7167, 13144, 17752) } },
+ { { AOM_CDF4(15820, 22738, 26488) },
+ { AOM_CDF4(13530, 20885, 25216) },
+ { AOM_CDF4(8395, 15530, 20452) },
+ { AOM_CDF4(6574, 12321, 16380) },
+ { AOM_CDF4(5353, 10419, 14568) },
+ { AOM_CDF4(4613, 8446, 12381) },
+ { AOM_CDF4(3440, 7158, 9903) },
+ { AOM_CDF4(24247, 29051, 31224) },
+ { AOM_CDF4(22118, 28058, 30369) },
+ { AOM_CDF4(16498, 24768, 28389) },
+ { AOM_CDF4(12920, 21175, 26137) },
+ { AOM_CDF4(10730, 18619, 25352) },
+ { AOM_CDF4(10187, 16279, 22791) },
+ { AOM_CDF4(9310, 14631, 22127) },
+ { AOM_CDF4(24970, 30558, 32057) },
+ { AOM_CDF4(24801, 29942, 31698) },
+ { AOM_CDF4(22432, 28453, 30855) },
+ { AOM_CDF4(19054, 25680, 29580) },
+ { AOM_CDF4(14392, 23036, 28109) },
+ { AOM_CDF4(12495, 20947, 26650) },
+ { AOM_CDF4(12442, 20326, 26214) } } },
+ { { { AOM_CDF4(12162, 18785, 22648) },
+ { AOM_CDF4(12749, 19697, 23806) },
+ { AOM_CDF4(8580, 15297, 20346) },
+ { AOM_CDF4(6169, 11749, 16543) },
+ { AOM_CDF4(4836, 9391, 13448) },
+ { AOM_CDF4(3821, 7711, 11613) },
+ { AOM_CDF4(2228, 4601, 7070) },
+ { AOM_CDF4(16319, 24725, 28280) },
+ { AOM_CDF4(15698, 23277, 27168) },
+ { AOM_CDF4(12726, 20368, 25047) },
+ { AOM_CDF4(9912, 17015, 21976) },
+ { AOM_CDF4(7888, 14220, 19179) },
+ { AOM_CDF4(6777, 12284, 17018) },
+ { AOM_CDF4(4492, 8590, 12252) },
+ { AOM_CDF4(23249, 28904, 30947) },
+ { AOM_CDF4(21050, 27908, 30512) },
+ { AOM_CDF4(17440, 25340, 28949) },
+ { AOM_CDF4(14059, 22018, 26541) },
+ { AOM_CDF4(11288, 18903, 23898) },
+ { AOM_CDF4(9411, 16342, 21428) },
+ { AOM_CDF4(6278, 11588, 15944) } },
+ { { AOM_CDF4(13981, 20067, 23226) },
+ { AOM_CDF4(16922, 23580, 26783) },
+ { AOM_CDF4(11005, 19039, 24487) },
+ { AOM_CDF4(7389, 14218, 19798) },
+ { AOM_CDF4(5598, 11505, 17206) },
+ { AOM_CDF4(6090, 11213, 15659) },
+ { AOM_CDF4(3820, 7371, 10119) },
+ { AOM_CDF4(21082, 26925, 29675) },
+ { AOM_CDF4(21262, 28627, 31128) },
+ { AOM_CDF4(18392, 26454, 30437) },
+ { AOM_CDF4(14870, 22910, 27096) },
+ { AOM_CDF4(12620, 19484, 24908) },
+ { AOM_CDF4(9290, 16553, 22802) },
+ { AOM_CDF4(6668, 14288, 20004) },
+ { AOM_CDF4(27704, 31055, 31949) },
+ { AOM_CDF4(24709, 29978, 31788) },
+ { AOM_CDF4(21668, 29264, 31657) },
+ { AOM_CDF4(18295, 26968, 30074) },
+ { AOM_CDF4(16399, 24422, 29313) },
+ { AOM_CDF4(14347, 23026, 28104) },
+ { AOM_CDF4(12370, 19806, 24477) } } },
+ { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } }
+ };
-static const coeff_cdf_model
-av1_default_coef_head_cdfs_q3[TX_SIZES][PLANE_TYPES] = {
- { // TX 4X4
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(25117), AOM_ICDF(25655), AOM_ICDF(28371), AOM_ICDF(30246),
- AOM_ICDF(30939), AOM_ICDF(32768), },
- {AOM_ICDF(15083), AOM_ICDF(16850), AOM_ICDF(26029), AOM_ICDF(29031),
- AOM_ICDF(30115), AOM_ICDF(32768), },
- {AOM_ICDF(8774), AOM_ICDF(12118), AOM_ICDF(22041), AOM_ICDF(26730),
- AOM_ICDF(28574), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(13690), AOM_ICDF(23135), AOM_ICDF(31469), AOM_ICDF(31868),
- AOM_ICDF(32768), },
- {AOM_ICDF(13306), AOM_ICDF(22730), AOM_ICDF(31466), AOM_ICDF(31860),
- AOM_ICDF(32768), },
- {AOM_ICDF(13503), AOM_ICDF(19892), AOM_ICDF(30528), AOM_ICDF(31005),
- AOM_ICDF(32768), },
- {AOM_ICDF(13150), AOM_ICDF(16108), AOM_ICDF(28345), AOM_ICDF(28869),
- AOM_ICDF(32768), },
- {AOM_ICDF(12014), AOM_ICDF(12842), AOM_ICDF(25693), AOM_ICDF(26145),
- AOM_ICDF(32768), },
- {AOM_ICDF(8937), AOM_ICDF(13405), AOM_ICDF(23831), AOM_ICDF(28300),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18707), AOM_ICDF(26260), AOM_ICDF(31853), AOM_ICDF(32238),
- AOM_ICDF(32768), },
- {AOM_ICDF(15985), AOM_ICDF(24804), AOM_ICDF(31717), AOM_ICDF(32115),
- AOM_ICDF(32768), },
- {AOM_ICDF(14012), AOM_ICDF(18913), AOM_ICDF(30497), AOM_ICDF(31005),
- AOM_ICDF(32768), },
- {AOM_ICDF(12300), AOM_ICDF(14741), AOM_ICDF(28386), AOM_ICDF(28958),
- AOM_ICDF(32768), },
- {AOM_ICDF(12483), AOM_ICDF(15084), AOM_ICDF(24966), AOM_ICDF(26526),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(19934), AOM_ICDF(28117), AOM_ICDF(32022), AOM_ICDF(32378),
- AOM_ICDF(32768), },
- {AOM_ICDF(14925), AOM_ICDF(26201), AOM_ICDF(31828), AOM_ICDF(32262),
- AOM_ICDF(32768), },
- {AOM_ICDF(13132), AOM_ICDF(18927), AOM_ICDF(30269), AOM_ICDF(31173),
- AOM_ICDF(32768), },
- {AOM_ICDF(13926), AOM_ICDF(19251), AOM_ICDF(28262), AOM_ICDF(29901),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(16626), AOM_ICDF(28981), AOM_ICDF(32074), AOM_ICDF(32413),
- AOM_ICDF(32768), },
- {AOM_ICDF(12895), AOM_ICDF(27583), AOM_ICDF(31974), AOM_ICDF(32332),
- AOM_ICDF(32768), },
- {AOM_ICDF(14150), AOM_ICDF(22094), AOM_ICDF(31030), AOM_ICDF(31775),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(5279), AOM_ICDF(29309), AOM_ICDF(32149), AOM_ICDF(32477),
- AOM_ICDF(32768), },
- {AOM_ICDF(5880), AOM_ICDF(29657), AOM_ICDF(32086), AOM_ICDF(32385),
- AOM_ICDF(32768), },
- {AOM_ICDF(11469), AOM_ICDF(18022), AOM_ICDF(22938), AOM_ICDF(27853),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(8302), AOM_ICDF(14024), AOM_ICDF(16072), AOM_ICDF(27926),
- AOM_ICDF(28871), AOM_ICDF(32768), },
- {AOM_ICDF(9359), AOM_ICDF(15522), AOM_ICDF(20581), AOM_ICDF(28595),
- AOM_ICDF(29250), AOM_ICDF(32768), },
- {AOM_ICDF(5318), AOM_ICDF(12803), AOM_ICDF(19679), AOM_ICDF(27719),
- AOM_ICDF(28609), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(22745), AOM_ICDF(25806), AOM_ICDF(31997), AOM_ICDF(32327),
- AOM_ICDF(32768), },
- {AOM_ICDF(18803), AOM_ICDF(25473), AOM_ICDF(31960), AOM_ICDF(32293),
- AOM_ICDF(32768), },
- {AOM_ICDF(15553), AOM_ICDF(19553), AOM_ICDF(31039), AOM_ICDF(31407),
- AOM_ICDF(32768), },
- {AOM_ICDF(13037), AOM_ICDF(15169), AOM_ICDF(28589), AOM_ICDF(29060),
- AOM_ICDF(32768), },
- {AOM_ICDF(10871), AOM_ICDF(11694), AOM_ICDF(24941), AOM_ICDF(25360),
- AOM_ICDF(32768), },
- {AOM_ICDF(6242), AOM_ICDF(10923), AOM_ICDF(18725), AOM_ICDF(23406),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22276), AOM_ICDF(27316), AOM_ICDF(32078), AOM_ICDF(32402),
- AOM_ICDF(32768), },
- {AOM_ICDF(19227), AOM_ICDF(25420), AOM_ICDF(31954), AOM_ICDF(32293),
- AOM_ICDF(32768), },
- {AOM_ICDF(12383), AOM_ICDF(16969), AOM_ICDF(30280), AOM_ICDF(30766),
- AOM_ICDF(32768), },
- {AOM_ICDF(11282), AOM_ICDF(13725), AOM_ICDF(26516), AOM_ICDF(27379),
- AOM_ICDF(32768), },
- {AOM_ICDF(5120), AOM_ICDF(9216), AOM_ICDF(15360), AOM_ICDF(20480),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(22814), AOM_ICDF(28656), AOM_ICDF(32097), AOM_ICDF(32425),
- AOM_ICDF(32768), },
- {AOM_ICDF(19349), AOM_ICDF(26355), AOM_ICDF(32000), AOM_ICDF(32341),
- AOM_ICDF(32768), },
- {AOM_ICDF(13824), AOM_ICDF(17830), AOM_ICDF(30780), AOM_ICDF(31142),
- AOM_ICDF(32768), },
- {AOM_ICDF(6746), AOM_ICDF(13493), AOM_ICDF(25058), AOM_ICDF(27949),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(19746), AOM_ICDF(28536), AOM_ICDF(32088), AOM_ICDF(32411),
- AOM_ICDF(32768), },
- {AOM_ICDF(17457), AOM_ICDF(27155), AOM_ICDF(32024), AOM_ICDF(32376),
- AOM_ICDF(32768), },
- {AOM_ICDF(10949), AOM_ICDF(16662), AOM_ICDF(29118), AOM_ICDF(30229),
- AOM_ICDF(32768), },
- {AOM_ICDF(6096), AOM_ICDF(12955), AOM_ICDF(21337), AOM_ICDF(27434),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(10114), AOM_ICDF(29713), AOM_ICDF(32140), AOM_ICDF(32448),
- AOM_ICDF(32768), },
- {AOM_ICDF(11455), AOM_ICDF(29324), AOM_ICDF(32094), AOM_ICDF(32419),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(14418), AOM_ICDF(23593), AOM_ICDF(27525),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(30309), AOM_ICDF(30623), AOM_ICDF(31738), AOM_ICDF(32084),
- AOM_ICDF(32428), AOM_ICDF(32768), },
- {AOM_ICDF(25732), AOM_ICDF(26211), AOM_ICDF(31079), AOM_ICDF(31737),
- AOM_ICDF(32269), AOM_ICDF(32768), },
- {AOM_ICDF(19676), AOM_ICDF(21061), AOM_ICDF(29564), AOM_ICDF(31011),
- AOM_ICDF(31879), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(12328), AOM_ICDF(28270), AOM_ICDF(32125), AOM_ICDF(32447),
- AOM_ICDF(32768), },
- {AOM_ICDF(11177), AOM_ICDF(28585), AOM_ICDF(32076), AOM_ICDF(32401),
- AOM_ICDF(32768), },
- {AOM_ICDF(13232), AOM_ICDF(25364), AOM_ICDF(31558), AOM_ICDF(32072),
- AOM_ICDF(32768), },
- {AOM_ICDF(11997), AOM_ICDF(18443), AOM_ICDF(30261), AOM_ICDF(31873),
- AOM_ICDF(32768), },
- {AOM_ICDF(7399), AOM_ICDF(11627), AOM_ICDF(24312), AOM_ICDF(27483),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(16893), AOM_ICDF(29817), AOM_ICDF(32005), AOM_ICDF(32463),
- AOM_ICDF(32768), },
- {AOM_ICDF(14911), AOM_ICDF(27935), AOM_ICDF(32179), AOM_ICDF(32473),
- AOM_ICDF(32768), },
- {AOM_ICDF(9973), AOM_ICDF(19946), AOM_ICDF(24220), AOM_ICDF(28494),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(18859), AOM_ICDF(29232), AOM_ICDF(31354), AOM_ICDF(32061),
- AOM_ICDF(32768), },
- {AOM_ICDF(11281), AOM_ICDF(26322), AOM_ICDF(29545), AOM_ICDF(31156),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(8937), AOM_ICDF(19363), AOM_ICDF(23831), AOM_ICDF(28300),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(30586), AOM_ICDF(30911), AOM_ICDF(31771), AOM_ICDF(32121),
- AOM_ICDF(32443), AOM_ICDF(32768), },
- {AOM_ICDF(23875), AOM_ICDF(24492), AOM_ICDF(30970), AOM_ICDF(31684),
- AOM_ICDF(32217), AOM_ICDF(32768), },
- {AOM_ICDF(15874), AOM_ICDF(17477), AOM_ICDF(29172), AOM_ICDF(30703),
- AOM_ICDF(32023), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17059), AOM_ICDF(30027), AOM_ICDF(32152), AOM_ICDF(32450),
- AOM_ICDF(32768), },
- {AOM_ICDF(13931), AOM_ICDF(29387), AOM_ICDF(32103), AOM_ICDF(32414),
- AOM_ICDF(32768), },
- {AOM_ICDF(12903), AOM_ICDF(25742), AOM_ICDF(31906), AOM_ICDF(32289),
- AOM_ICDF(32768), },
- {AOM_ICDF(13493), AOM_ICDF(23130), AOM_ICDF(29614), AOM_ICDF(30840),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(14746), AOM_ICDF(26214), AOM_ICDF(28672),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18660), AOM_ICDF(30626), AOM_ICDF(32150), AOM_ICDF(32459),
- AOM_ICDF(32768), },
- {AOM_ICDF(17338), AOM_ICDF(29279), AOM_ICDF(32168), AOM_ICDF(32495),
- AOM_ICDF(32768), },
- {AOM_ICDF(11916), AOM_ICDF(17873), AOM_ICDF(26810), AOM_ICDF(29789),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(14564), AOM_ICDF(21845), AOM_ICDF(27307),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(23269), AOM_ICDF(31374), AOM_ICDF(32245), AOM_ICDF(32507),
- AOM_ICDF(32768), },
- {AOM_ICDF(15741), AOM_ICDF(27628), AOM_ICDF(30840), AOM_ICDF(31804),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(15464), AOM_ICDF(29454), AOM_ICDF(30559), AOM_ICDF(31663),
- AOM_ICDF(32768), },
- {AOM_ICDF(6827), AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 8X8
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(18128), AOM_ICDF(19079), AOM_ICDF(27400), AOM_ICDF(29265),
- AOM_ICDF(30385), AOM_ICDF(32768), },
- {AOM_ICDF(10290), AOM_ICDF(12446), AOM_ICDF(23496), AOM_ICDF(26905),
- AOM_ICDF(28729), AOM_ICDF(32768), },
- {AOM_ICDF(5877), AOM_ICDF(9423), AOM_ICDF(18374), AOM_ICDF(23871),
- AOM_ICDF(26028), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(16010), AOM_ICDF(22388), AOM_ICDF(30990), AOM_ICDF(31378),
- AOM_ICDF(32768), },
- {AOM_ICDF(14579), AOM_ICDF(21619), AOM_ICDF(30755), AOM_ICDF(31177),
- AOM_ICDF(32768), },
- {AOM_ICDF(13859), AOM_ICDF(18660), AOM_ICDF(29381), AOM_ICDF(29904),
- AOM_ICDF(32768), },
- {AOM_ICDF(12288), AOM_ICDF(14656), AOM_ICDF(27505), AOM_ICDF(28077),
- AOM_ICDF(32768), },
- {AOM_ICDF(10009), AOM_ICDF(10812), AOM_ICDF(23591), AOM_ICDF(24068),
- AOM_ICDF(32768), },
- {AOM_ICDF(8663), AOM_ICDF(9981), AOM_ICDF(19962), AOM_ICDF(20904),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20773), AOM_ICDF(24941), AOM_ICDF(31701), AOM_ICDF(32046),
- AOM_ICDF(32768), },
- {AOM_ICDF(17537), AOM_ICDF(22279), AOM_ICDF(31257), AOM_ICDF(31629),
- AOM_ICDF(32768), },
- {AOM_ICDF(13337), AOM_ICDF(15972), AOM_ICDF(29181), AOM_ICDF(29575),
- AOM_ICDF(32768), },
- {AOM_ICDF(11120), AOM_ICDF(12128), AOM_ICDF(26440), AOM_ICDF(26874),
- AOM_ICDF(32768), },
- {AOM_ICDF(10061), AOM_ICDF(10800), AOM_ICDF(23999), AOM_ICDF(24276),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24073), AOM_ICDF(27227), AOM_ICDF(31920), AOM_ICDF(32246),
- AOM_ICDF(32768), },
- {AOM_ICDF(18916), AOM_ICDF(22611), AOM_ICDF(31508), AOM_ICDF(31853),
- AOM_ICDF(32768), },
- {AOM_ICDF(13371), AOM_ICDF(14495), AOM_ICDF(28662), AOM_ICDF(29093),
- AOM_ICDF(32768), },
- {AOM_ICDF(9283), AOM_ICDF(9840), AOM_ICDF(24228), AOM_ICDF(24506),
- AOM_ICDF(32768), },
- {AOM_ICDF(4681), AOM_ICDF(9362), AOM_ICDF(20285), AOM_ICDF(24966),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(25180), AOM_ICDF(28079), AOM_ICDF(32048), AOM_ICDF(32365),
- AOM_ICDF(32768), },
- {AOM_ICDF(19790), AOM_ICDF(23090), AOM_ICDF(31675), AOM_ICDF(32001),
- AOM_ICDF(32768), },
- {AOM_ICDF(12634), AOM_ICDF(13382), AOM_ICDF(28384), AOM_ICDF(28718),
- AOM_ICDF(32768), },
- {AOM_ICDF(11264), AOM_ICDF(12083), AOM_ICDF(28672), AOM_ICDF(29286),
- AOM_ICDF(32768), },
- {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(26180), AOM_ICDF(29109), AOM_ICDF(32085), AOM_ICDF(32408),
- AOM_ICDF(32768), },
- {AOM_ICDF(19990), AOM_ICDF(23991), AOM_ICDF(31806), AOM_ICDF(32152),
- AOM_ICDF(32768), },
- {AOM_ICDF(13735), AOM_ICDF(14612), AOM_ICDF(29022), AOM_ICDF(29326),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(10240), AOM_ICDF(25259), AOM_ICDF(27307),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(5084), AOM_ICDF(13063), AOM_ICDF(15732), AOM_ICDF(27628),
- AOM_ICDF(28823), AOM_ICDF(32768), },
- {AOM_ICDF(3233), AOM_ICDF(11850), AOM_ICDF(16878), AOM_ICDF(26809),
- AOM_ICDF(27973), AOM_ICDF(32768), },
- {AOM_ICDF(1405), AOM_ICDF(10468), AOM_ICDF(15220), AOM_ICDF(25209),
- AOM_ICDF(26482), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23854), AOM_ICDF(26692), AOM_ICDF(31964), AOM_ICDF(32291),
- AOM_ICDF(32768), },
- {AOM_ICDF(20514), AOM_ICDF(25677), AOM_ICDF(31833), AOM_ICDF(32170),
- AOM_ICDF(32768), },
- {AOM_ICDF(16504), AOM_ICDF(20235), AOM_ICDF(30877), AOM_ICDF(31237),
- AOM_ICDF(32768), },
- {AOM_ICDF(13241), AOM_ICDF(15173), AOM_ICDF(28673), AOM_ICDF(29116),
- AOM_ICDF(32768), },
- {AOM_ICDF(9526), AOM_ICDF(10553), AOM_ICDF(23852), AOM_ICDF(24361),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(6428), AOM_ICDF(17806), AOM_ICDF(18148),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24345), AOM_ICDF(27736), AOM_ICDF(32033), AOM_ICDF(32355),
- AOM_ICDF(32768), },
- {AOM_ICDF(20277), AOM_ICDF(23726), AOM_ICDF(31700), AOM_ICDF(32031),
- AOM_ICDF(32768), },
- {AOM_ICDF(13361), AOM_ICDF(15650), AOM_ICDF(29411), AOM_ICDF(29794),
- AOM_ICDF(32768), },
- {AOM_ICDF(9421), AOM_ICDF(10887), AOM_ICDF(25426), AOM_ICDF(26039),
- AOM_ICDF(32768), },
- {AOM_ICDF(6242), AOM_ICDF(7607), AOM_ICDF(17749), AOM_ICDF(18530),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26118), AOM_ICDF(28888), AOM_ICDF(32095), AOM_ICDF(32413),
- AOM_ICDF(32768), },
- {AOM_ICDF(21286), AOM_ICDF(24631), AOM_ICDF(31871), AOM_ICDF(32198),
- AOM_ICDF(32768), },
- {AOM_ICDF(13285), AOM_ICDF(15402), AOM_ICDF(29317), AOM_ICDF(29737),
- AOM_ICDF(32768), },
- {AOM_ICDF(9902), AOM_ICDF(10814), AOM_ICDF(24755), AOM_ICDF(25276),
- AOM_ICDF(32768), },
- {AOM_ICDF(11431), AOM_ICDF(13717), AOM_ICDF(20575), AOM_ICDF(23623),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27178), AOM_ICDF(29612), AOM_ICDF(32119), AOM_ICDF(32433),
- AOM_ICDF(32768), },
- {AOM_ICDF(22095), AOM_ICDF(25550), AOM_ICDF(31976), AOM_ICDF(32298),
- AOM_ICDF(32768), },
- {AOM_ICDF(13847), AOM_ICDF(16273), AOM_ICDF(29602), AOM_ICDF(30024),
- AOM_ICDF(32768), },
- {AOM_ICDF(8771), AOM_ICDF(10923), AOM_ICDF(19694), AOM_ICDF(20521),
- AOM_ICDF(32768), },
- {AOM_ICDF(11398), AOM_ICDF(15672), AOM_ICDF(21370), AOM_ICDF(25645),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(28257), AOM_ICDF(30327), AOM_ICDF(32126), AOM_ICDF(32441),
- AOM_ICDF(32768), },
- {AOM_ICDF(22325), AOM_ICDF(26453), AOM_ICDF(32054), AOM_ICDF(32380),
- AOM_ICDF(32768), },
- {AOM_ICDF(14860), AOM_ICDF(17652), AOM_ICDF(30682), AOM_ICDF(31035),
- AOM_ICDF(32768), },
- {AOM_ICDF(5097), AOM_ICDF(10194), AOM_ICDF(18933), AOM_ICDF(21117),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(28902), AOM_ICDF(29234), AOM_ICDF(31608), AOM_ICDF(31973),
- AOM_ICDF(32378), AOM_ICDF(32768), },
- {AOM_ICDF(22721), AOM_ICDF(23397), AOM_ICDF(30476), AOM_ICDF(31293),
- AOM_ICDF(32179), AOM_ICDF(32768), },
- {AOM_ICDF(16404), AOM_ICDF(18013), AOM_ICDF(27505), AOM_ICDF(29454),
- AOM_ICDF(31300), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(14290), AOM_ICDF(27662), AOM_ICDF(31923), AOM_ICDF(32327),
- AOM_ICDF(32768), },
- {AOM_ICDF(13282), AOM_ICDF(26727), AOM_ICDF(31749), AOM_ICDF(32113),
- AOM_ICDF(32768), },
- {AOM_ICDF(12514), AOM_ICDF(22487), AOM_ICDF(30689), AOM_ICDF(31459),
- AOM_ICDF(32768), },
- {AOM_ICDF(11657), AOM_ICDF(16967), AOM_ICDF(29660), AOM_ICDF(30437),
- AOM_ICDF(32768), },
- {AOM_ICDF(8937), AOM_ICDF(12660), AOM_ICDF(24576), AOM_ICDF(26810),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20145), AOM_ICDF(28026), AOM_ICDF(31820), AOM_ICDF(32212),
- AOM_ICDF(32768), },
- {AOM_ICDF(16906), AOM_ICDF(25677), AOM_ICDF(31760), AOM_ICDF(32059),
- AOM_ICDF(32768), },
- {AOM_ICDF(12332), AOM_ICDF(18322), AOM_ICDF(29597), AOM_ICDF(31006),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(13107), AOM_ICDF(21299), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(23492), AOM_ICDF(29214), AOM_ICDF(32166), AOM_ICDF(32467),
- AOM_ICDF(32768), },
- {AOM_ICDF(18757), AOM_ICDF(25536), AOM_ICDF(31789), AOM_ICDF(32165),
- AOM_ICDF(32768), },
- {AOM_ICDF(12603), AOM_ICDF(16384), AOM_ICDF(25206), AOM_ICDF(28987),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(24518), AOM_ICDF(29453), AOM_ICDF(32074), AOM_ICDF(32382),
- AOM_ICDF(32768), },
- {AOM_ICDF(19369), AOM_ICDF(26533), AOM_ICDF(31972), AOM_ICDF(32370),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(24576), AOM_ICDF(28789), AOM_ICDF(31364), AOM_ICDF(32066),
- AOM_ICDF(32768), },
- {AOM_ICDF(20052), AOM_ICDF(24454), AOM_ICDF(29834), AOM_ICDF(31301),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(30358), AOM_ICDF(30700), AOM_ICDF(31747), AOM_ICDF(32103),
- AOM_ICDF(32430), AOM_ICDF(32768), },
- {AOM_ICDF(22346), AOM_ICDF(23277), AOM_ICDF(30508), AOM_ICDF(31386),
- AOM_ICDF(32138), AOM_ICDF(32768), },
- {AOM_ICDF(11974), AOM_ICDF(14562), AOM_ICDF(27349), AOM_ICDF(28970),
- AOM_ICDF(31969), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(22910), AOM_ICDF(29539), AOM_ICDF(32102), AOM_ICDF(32412),
- AOM_ICDF(32768), },
- {AOM_ICDF(18429), AOM_ICDF(28710), AOM_ICDF(32106), AOM_ICDF(32432),
- AOM_ICDF(32768), },
- {AOM_ICDF(13601), AOM_ICDF(25238), AOM_ICDF(31845), AOM_ICDF(32262),
- AOM_ICDF(32768), },
- {AOM_ICDF(12472), AOM_ICDF(20976), AOM_ICDF(29026), AOM_ICDF(30500),
- AOM_ICDF(32768), },
- {AOM_ICDF(8738), AOM_ICDF(11469), AOM_ICDF(24030), AOM_ICDF(26761),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23359), AOM_ICDF(30038), AOM_ICDF(32127), AOM_ICDF(32444),
- AOM_ICDF(32768), },
- {AOM_ICDF(19590), AOM_ICDF(28108), AOM_ICDF(32056), AOM_ICDF(32382),
- AOM_ICDF(32768), },
- {AOM_ICDF(15578), AOM_ICDF(22024), AOM_ICDF(29008), AOM_ICDF(30619),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26372), AOM_ICDF(31019), AOM_ICDF(32146), AOM_ICDF(32463),
- AOM_ICDF(32768), },
- {AOM_ICDF(22190), AOM_ICDF(28573), AOM_ICDF(32160), AOM_ICDF(32464),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(22938), AOM_ICDF(27853),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26672), AOM_ICDF(31311), AOM_ICDF(32156), AOM_ICDF(32462),
- AOM_ICDF(32768), },
- {AOM_ICDF(20946), AOM_ICDF(27885), AOM_ICDF(31997), AOM_ICDF(32382),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(27342), AOM_ICDF(31385), AOM_ICDF(32130), AOM_ICDF(32449),
- AOM_ICDF(32768), },
- {AOM_ICDF(8674), AOM_ICDF(22167), AOM_ICDF(26985), AOM_ICDF(29877),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 16X16
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(2479), AOM_ICDF(4993), AOM_ICDF(17332), AOM_ICDF(21885),
- AOM_ICDF(25826), AOM_ICDF(32768), },
- {AOM_ICDF(2848), AOM_ICDF(5996), AOM_ICDF(15242), AOM_ICDF(20755),
- AOM_ICDF(23763), AOM_ICDF(32768), },
- {AOM_ICDF(2125), AOM_ICDF(6226), AOM_ICDF(11733), AOM_ICDF(18389),
- AOM_ICDF(20442), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(14539), AOM_ICDF(19828), AOM_ICDF(29467), AOM_ICDF(29934),
- AOM_ICDF(32768), },
- {AOM_ICDF(12513), AOM_ICDF(19139), AOM_ICDF(29177), AOM_ICDF(29702),
- AOM_ICDF(32768), },
- {AOM_ICDF(11826), AOM_ICDF(16348), AOM_ICDF(27245), AOM_ICDF(27977),
- AOM_ICDF(32768), },
- {AOM_ICDF(10123), AOM_ICDF(12262), AOM_ICDF(24690), AOM_ICDF(25359),
- AOM_ICDF(32768), },
- {AOM_ICDF(7979), AOM_ICDF(8826), AOM_ICDF(20804), AOM_ICDF(21295),
- AOM_ICDF(32768), },
- {AOM_ICDF(5262), AOM_ICDF(5604), AOM_ICDF(14716), AOM_ICDF(15015),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(20625), AOM_ICDF(24118), AOM_ICDF(31086), AOM_ICDF(31446),
- AOM_ICDF(32768), },
- {AOM_ICDF(16710), AOM_ICDF(20899), AOM_ICDF(30505), AOM_ICDF(30864),
- AOM_ICDF(32768), },
- {AOM_ICDF(13161), AOM_ICDF(15579), AOM_ICDF(27988), AOM_ICDF(28449),
- AOM_ICDF(32768), },
- {AOM_ICDF(10596), AOM_ICDF(11651), AOM_ICDF(24124), AOM_ICDF(24589),
- AOM_ICDF(32768), },
- {AOM_ICDF(7724), AOM_ICDF(8452), AOM_ICDF(21060), AOM_ICDF(21476),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(9466), AOM_ICDF(18933), AOM_ICDF(21117),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24265), AOM_ICDF(26472), AOM_ICDF(31667), AOM_ICDF(31998),
- AOM_ICDF(32768), },
- {AOM_ICDF(18213), AOM_ICDF(21117), AOM_ICDF(30932), AOM_ICDF(31280),
- AOM_ICDF(32768), },
- {AOM_ICDF(12944), AOM_ICDF(14000), AOM_ICDF(27696), AOM_ICDF(28050),
- AOM_ICDF(32768), },
- {AOM_ICDF(9709), AOM_ICDF(10056), AOM_ICDF(23282), AOM_ICDF(23579),
- AOM_ICDF(32768), },
- {AOM_ICDF(8590), AOM_ICDF(9862), AOM_ICDF(18770), AOM_ICDF(19724),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26658), AOM_ICDF(28275), AOM_ICDF(31975), AOM_ICDF(32294),
- AOM_ICDF(32768), },
- {AOM_ICDF(20049), AOM_ICDF(22203), AOM_ICDF(31374), AOM_ICDF(31708),
- AOM_ICDF(32768), },
- {AOM_ICDF(12795), AOM_ICDF(13387), AOM_ICDF(28328), AOM_ICDF(28653),
- AOM_ICDF(32768), },
- {AOM_ICDF(8607), AOM_ICDF(9073), AOM_ICDF(23383), AOM_ICDF(23695),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(9947), AOM_ICDF(18725), AOM_ICDF(20480),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(28651), AOM_ICDF(29902), AOM_ICDF(32085), AOM_ICDF(32402),
- AOM_ICDF(32768), },
- {AOM_ICDF(21133), AOM_ICDF(23229), AOM_ICDF(31684), AOM_ICDF(32013),
- AOM_ICDF(32768), },
- {AOM_ICDF(13231), AOM_ICDF(14045), AOM_ICDF(28203), AOM_ICDF(28576),
- AOM_ICDF(32768), },
- {AOM_ICDF(7903), AOM_ICDF(8481), AOM_ICDF(21781), AOM_ICDF(22359),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(824), AOM_ICDF(8672), AOM_ICDF(16514), AOM_ICDF(27587),
- AOM_ICDF(29231), AOM_ICDF(32768), },
- {AOM_ICDF(1118), AOM_ICDF(9561), AOM_ICDF(17021), AOM_ICDF(25911),
- AOM_ICDF(27753), AOM_ICDF(32768), },
- {AOM_ICDF(806), AOM_ICDF(9313), AOM_ICDF(13998), AOM_ICDF(22910),
- AOM_ICDF(25224), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(23650), AOM_ICDF(26487), AOM_ICDF(31840), AOM_ICDF(32166),
- AOM_ICDF(32768), },
- {AOM_ICDF(19593), AOM_ICDF(25206), AOM_ICDF(31604), AOM_ICDF(31944),
- AOM_ICDF(32768), },
- {AOM_ICDF(15813), AOM_ICDF(19643), AOM_ICDF(30328), AOM_ICDF(30726),
- AOM_ICDF(32768), },
- {AOM_ICDF(12978), AOM_ICDF(15108), AOM_ICDF(27886), AOM_ICDF(28310),
- AOM_ICDF(32768), },
- {AOM_ICDF(9793), AOM_ICDF(11020), AOM_ICDF(23305), AOM_ICDF(23818),
- AOM_ICDF(32768), },
- {AOM_ICDF(4855), AOM_ICDF(5565), AOM_ICDF(14268), AOM_ICDF(14741),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(24547), AOM_ICDF(27751), AOM_ICDF(31964), AOM_ICDF(32285),
- AOM_ICDF(32768), },
- {AOM_ICDF(19674), AOM_ICDF(23377), AOM_ICDF(31426), AOM_ICDF(31759),
- AOM_ICDF(32768), },
- {AOM_ICDF(12643), AOM_ICDF(14489), AOM_ICDF(28159), AOM_ICDF(28541),
- AOM_ICDF(32768), },
- {AOM_ICDF(9110), AOM_ICDF(10279), AOM_ICDF(23565), AOM_ICDF(23992),
- AOM_ICDF(32768), },
- {AOM_ICDF(5082), AOM_ICDF(5617), AOM_ICDF(16317), AOM_ICDF(16651),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(18971), AOM_ICDF(24145),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26773), AOM_ICDF(29038), AOM_ICDF(32050), AOM_ICDF(32367),
- AOM_ICDF(32768), },
- {AOM_ICDF(20956), AOM_ICDF(23898), AOM_ICDF(31563), AOM_ICDF(31888),
- AOM_ICDF(32768), },
- {AOM_ICDF(12527), AOM_ICDF(13472), AOM_ICDF(27840), AOM_ICDF(28211),
- AOM_ICDF(32768), },
- {AOM_ICDF(8773), AOM_ICDF(9353), AOM_ICDF(22555), AOM_ICDF(22856),
- AOM_ICDF(32768), },
- {AOM_ICDF(4291), AOM_ICDF(4876), AOM_ICDF(16969), AOM_ICDF(17554),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(17348), AOM_ICDF(23130),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28065), AOM_ICDF(29768), AOM_ICDF(32086), AOM_ICDF(32400),
- AOM_ICDF(32768), },
- {AOM_ICDF(21847), AOM_ICDF(24001), AOM_ICDF(31608), AOM_ICDF(31929),
- AOM_ICDF(32768), },
- {AOM_ICDF(12482), AOM_ICDF(13091), AOM_ICDF(27413), AOM_ICDF(27739),
- AOM_ICDF(32768), },
- {AOM_ICDF(7582), AOM_ICDF(8002), AOM_ICDF(22090), AOM_ICDF(22405),
- AOM_ICDF(32768), },
- {AOM_ICDF(6324), AOM_ICDF(7186), AOM_ICDF(15809), AOM_ICDF(16671),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29731), AOM_ICDF(30798), AOM_ICDF(32113), AOM_ICDF(32431),
- AOM_ICDF(32768), },
- {AOM_ICDF(22224), AOM_ICDF(24448), AOM_ICDF(31791), AOM_ICDF(32118),
- AOM_ICDF(32768), },
- {AOM_ICDF(12622), AOM_ICDF(13513), AOM_ICDF(28103), AOM_ICDF(28530),
- AOM_ICDF(32768), },
- {AOM_ICDF(8886), AOM_ICDF(9600), AOM_ICDF(22890), AOM_ICDF(23604),
- AOM_ICDF(32768), },
- {AOM_ICDF(8058), AOM_ICDF(9669), AOM_ICDF(18264), AOM_ICDF(19876),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(27375), AOM_ICDF(27731), AOM_ICDF(31591), AOM_ICDF(31993),
- AOM_ICDF(32404), AOM_ICDF(32768), },
- {AOM_ICDF(20943), AOM_ICDF(21758), AOM_ICDF(30037), AOM_ICDF(31074),
- AOM_ICDF(32003), AOM_ICDF(32768), },
- {AOM_ICDF(16218), AOM_ICDF(17771), AOM_ICDF(26832), AOM_ICDF(29181),
- AOM_ICDF(30586), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17239), AOM_ICDF(27853), AOM_ICDF(31557), AOM_ICDF(32198),
- AOM_ICDF(32768), },
- {AOM_ICDF(14494), AOM_ICDF(25906), AOM_ICDF(31543), AOM_ICDF(32033),
- AOM_ICDF(32768), },
- {AOM_ICDF(12980), AOM_ICDF(19788), AOM_ICDF(29137), AOM_ICDF(29410),
- AOM_ICDF(32768), },
- {AOM_ICDF(11796), AOM_ICDF(14680), AOM_ICDF(26477), AOM_ICDF(27787),
- AOM_ICDF(32768), },
- {AOM_ICDF(12603), AOM_ICDF(15124), AOM_ICDF(21005), AOM_ICDF(23526),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(22821), AOM_ICDF(27655), AOM_ICDF(32024), AOM_ICDF(32303),
- AOM_ICDF(32768), },
- {AOM_ICDF(16534), AOM_ICDF(23629), AOM_ICDF(31145), AOM_ICDF(31686),
- AOM_ICDF(32768), },
- {AOM_ICDF(12407), AOM_ICDF(14952), AOM_ICDF(28950), AOM_ICDF(30859),
- AOM_ICDF(32768), },
- {AOM_ICDF(6554), AOM_ICDF(10486), AOM_ICDF(19661), AOM_ICDF(23593),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(26369), AOM_ICDF(29624), AOM_ICDF(31996), AOM_ICDF(32272),
- AOM_ICDF(32768), },
- {AOM_ICDF(19346), AOM_ICDF(24807), AOM_ICDF(31750), AOM_ICDF(32027),
- AOM_ICDF(32768), },
- {AOM_ICDF(15056), AOM_ICDF(19484), AOM_ICDF(27454), AOM_ICDF(30111),
- AOM_ICDF(32768), },
- {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28213), AOM_ICDF(30301), AOM_ICDF(32199), AOM_ICDF(32483),
- AOM_ICDF(32768), },
- {AOM_ICDF(22988), AOM_ICDF(27307), AOM_ICDF(31879), AOM_ICDF(32260),
- AOM_ICDF(32768), },
- {AOM_ICDF(11796), AOM_ICDF(15729), AOM_ICDF(24904), AOM_ICDF(28836),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29813), AOM_ICDF(31323), AOM_ICDF(32142), AOM_ICDF(32444),
- AOM_ICDF(32768), },
- {AOM_ICDF(21497), AOM_ICDF(25254), AOM_ICDF(31307), AOM_ICDF(32142),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(30560), AOM_ICDF(30889), AOM_ICDF(31795), AOM_ICDF(32128),
- AOM_ICDF(32455), AOM_ICDF(32768), },
- {AOM_ICDF(20347), AOM_ICDF(20993), AOM_ICDF(30496), AOM_ICDF(31112),
- AOM_ICDF(32263), AOM_ICDF(32768), },
- {AOM_ICDF(9723), AOM_ICDF(10992), AOM_ICDF(27830), AOM_ICDF(28681),
- AOM_ICDF(32168), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(25900), AOM_ICDF(30610), AOM_ICDF(32179), AOM_ICDF(32474),
- AOM_ICDF(32768), },
- {AOM_ICDF(18535), AOM_ICDF(29316), AOM_ICDF(32153), AOM_ICDF(32437),
- AOM_ICDF(32768), },
- {AOM_ICDF(15230), AOM_ICDF(25845), AOM_ICDF(30922), AOM_ICDF(31845),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(27097), AOM_ICDF(28987),
- AOM_ICDF(32768), },
- {AOM_ICDF(8548), AOM_ICDF(12822), AOM_ICDF(21370), AOM_ICDF(25645),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(26104), AOM_ICDF(30659), AOM_ICDF(32157), AOM_ICDF(32462),
- AOM_ICDF(32768), },
- {AOM_ICDF(20457), AOM_ICDF(28242), AOM_ICDF(31682), AOM_ICDF(32225),
- AOM_ICDF(32768), },
- {AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(28672),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(28740), AOM_ICDF(30618), AOM_ICDF(32154), AOM_ICDF(32461),
- AOM_ICDF(32768), },
- {AOM_ICDF(19333), AOM_ICDF(26214), AOM_ICDF(30802), AOM_ICDF(31785),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28161), AOM_ICDF(30834), AOM_ICDF(32160), AOM_ICDF(32464),
- AOM_ICDF(32768), },
- {AOM_ICDF(26536), AOM_ICDF(29149), AOM_ICDF(31562), AOM_ICDF(32165),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(29913), AOM_ICDF(31560), AOM_ICDF(32172), AOM_ICDF(32470),
- AOM_ICDF(32768), },
- {AOM_ICDF(22209), AOM_ICDF(28035), AOM_ICDF(30583), AOM_ICDF(31676),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
- { // TX 32X32
- { // Y plane
- { // Intra
- { // Band 0
- {AOM_ICDF(3982), AOM_ICDF(6433), AOM_ICDF(20418), AOM_ICDF(25151),
- AOM_ICDF(27471), AOM_ICDF(32768), },
- {AOM_ICDF(3342), AOM_ICDF(6943), AOM_ICDF(15018), AOM_ICDF(20274),
- AOM_ICDF(22412), AOM_ICDF(32768), },
- {AOM_ICDF(1805), AOM_ICDF(5863), AOM_ICDF(9932), AOM_ICDF(16426),
- AOM_ICDF(17655), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(11799), AOM_ICDF(19138), AOM_ICDF(28295), AOM_ICDF(28881),
- AOM_ICDF(32768), },
- {AOM_ICDF(11008), AOM_ICDF(18597), AOM_ICDF(28369), AOM_ICDF(29021),
- AOM_ICDF(32768), },
- {AOM_ICDF(10104), AOM_ICDF(15628), AOM_ICDF(26339), AOM_ICDF(27195),
- AOM_ICDF(32768), },
- {AOM_ICDF(8537), AOM_ICDF(11246), AOM_ICDF(22663), AOM_ICDF(23623),
- AOM_ICDF(32768), },
- {AOM_ICDF(5895), AOM_ICDF(6476), AOM_ICDF(16647), AOM_ICDF(17329),
- AOM_ICDF(32768), },
- {AOM_ICDF(4046), AOM_ICDF(4357), AOM_ICDF(10849), AOM_ICDF(11160),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(18503), AOM_ICDF(22222), AOM_ICDF(30403), AOM_ICDF(30814),
- AOM_ICDF(32768), },
- {AOM_ICDF(15264), AOM_ICDF(19282), AOM_ICDF(29949), AOM_ICDF(30339),
- AOM_ICDF(32768), },
- {AOM_ICDF(12101), AOM_ICDF(14721), AOM_ICDF(27350), AOM_ICDF(27783),
- AOM_ICDF(32768), },
- {AOM_ICDF(9243), AOM_ICDF(10177), AOM_ICDF(22679), AOM_ICDF(23097),
- AOM_ICDF(32768), },
- {AOM_ICDF(5571), AOM_ICDF(5967), AOM_ICDF(16714), AOM_ICDF(17043),
- AOM_ICDF(32768), },
- {AOM_ICDF(2731), AOM_ICDF(3755), AOM_ICDF(14677), AOM_ICDF(15701),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(23077), AOM_ICDF(25272), AOM_ICDF(31444), AOM_ICDF(31771),
- AOM_ICDF(32768), },
- {AOM_ICDF(16598), AOM_ICDF(19790), AOM_ICDF(30479), AOM_ICDF(30822),
- AOM_ICDF(32768), },
- {AOM_ICDF(11961), AOM_ICDF(12871), AOM_ICDF(27162), AOM_ICDF(27529),
- AOM_ICDF(32768), },
- {AOM_ICDF(8156), AOM_ICDF(8563), AOM_ICDF(22220), AOM_ICDF(22579),
- AOM_ICDF(32768), },
- {AOM_ICDF(5851), AOM_ICDF(6242), AOM_ICDF(15994), AOM_ICDF(16384),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(26084), AOM_ICDF(27933), AOM_ICDF(31906), AOM_ICDF(32223),
- AOM_ICDF(32768), },
- {AOM_ICDF(19335), AOM_ICDF(21760), AOM_ICDF(31149), AOM_ICDF(31477),
- AOM_ICDF(32768), },
- {AOM_ICDF(12724), AOM_ICDF(13278), AOM_ICDF(27015), AOM_ICDF(27365),
- AOM_ICDF(32768), },
- {AOM_ICDF(8687), AOM_ICDF(9010), AOM_ICDF(21051), AOM_ICDF(21334),
- AOM_ICDF(32768), },
- {AOM_ICDF(5814), AOM_ICDF(6606), AOM_ICDF(14534), AOM_ICDF(15327),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30147), AOM_ICDF(30787), AOM_ICDF(32081), AOM_ICDF(32395),
- AOM_ICDF(32768), },
- {AOM_ICDF(20402), AOM_ICDF(21697), AOM_ICDF(30943), AOM_ICDF(31266),
- AOM_ICDF(32768), },
- {AOM_ICDF(11661), AOM_ICDF(12125), AOM_ICDF(25710), AOM_ICDF(26034),
- AOM_ICDF(32768), },
- {AOM_ICDF(7224), AOM_ICDF(7504), AOM_ICDF(19876), AOM_ICDF(20156),
- AOM_ICDF(32768), },
- {AOM_ICDF(6183), AOM_ICDF(7110), AOM_ICDF(17002), AOM_ICDF(17930),
- AOM_ICDF(32768), },
- {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(4079), AOM_ICDF(8378), AOM_ICDF(25109), AOM_ICDF(29897),
- AOM_ICDF(30898), AOM_ICDF(32768), },
- {AOM_ICDF(3870), AOM_ICDF(8207), AOM_ICDF(22495), AOM_ICDF(27162),
- AOM_ICDF(29559), AOM_ICDF(32768), },
- {AOM_ICDF(2127), AOM_ICDF(6197), AOM_ICDF(15932), AOM_ICDF(20604),
- AOM_ICDF(27312), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(21253), AOM_ICDF(26168), AOM_ICDF(31780), AOM_ICDF(32120),
- AOM_ICDF(32768), },
- {AOM_ICDF(16610), AOM_ICDF(23985), AOM_ICDF(31495), AOM_ICDF(31866),
- AOM_ICDF(32768), },
- {AOM_ICDF(14861), AOM_ICDF(21030), AOM_ICDF(30219), AOM_ICDF(30784),
- AOM_ICDF(32768), },
- {AOM_ICDF(14573), AOM_ICDF(18162), AOM_ICDF(28524), AOM_ICDF(29116),
- AOM_ICDF(32768), },
- {AOM_ICDF(14036), AOM_ICDF(15983), AOM_ICDF(26283), AOM_ICDF(27085),
- AOM_ICDF(32768), },
- {AOM_ICDF(9119), AOM_ICDF(10742), AOM_ICDF(19630), AOM_ICDF(20016),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23192), AOM_ICDF(27248), AOM_ICDF(31887), AOM_ICDF(32215),
- AOM_ICDF(32768), },
- {AOM_ICDF(18219), AOM_ICDF(23213), AOM_ICDF(31417), AOM_ICDF(31769),
- AOM_ICDF(32768), },
- {AOM_ICDF(12657), AOM_ICDF(14754), AOM_ICDF(27845), AOM_ICDF(28233),
- AOM_ICDF(32768), },
- {AOM_ICDF(8127), AOM_ICDF(8829), AOM_ICDF(20909), AOM_ICDF(21279),
- AOM_ICDF(32768), },
- {AOM_ICDF(7547), AOM_ICDF(8142), AOM_ICDF(17476), AOM_ICDF(18072),
- AOM_ICDF(32768), },
- {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(21845),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(25516), AOM_ICDF(28301), AOM_ICDF(31970), AOM_ICDF(32289),
- AOM_ICDF(32768), },
- {AOM_ICDF(19094), AOM_ICDF(23041), AOM_ICDF(31404), AOM_ICDF(31732),
- AOM_ICDF(32768), },
- {AOM_ICDF(12328), AOM_ICDF(13099), AOM_ICDF(27275), AOM_ICDF(27613),
- AOM_ICDF(32768), },
- {AOM_ICDF(8134), AOM_ICDF(8458), AOM_ICDF(21075), AOM_ICDF(21352),
- AOM_ICDF(32768), },
- {AOM_ICDF(5041), AOM_ICDF(5881), AOM_ICDF(17644), AOM_ICDF(18485),
- AOM_ICDF(32768), },
- {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(18204), AOM_ICDF(23666),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28082), AOM_ICDF(29782), AOM_ICDF(32087), AOM_ICDF(32400),
- AOM_ICDF(32768), },
- {AOM_ICDF(21281), AOM_ICDF(24161), AOM_ICDF(31679), AOM_ICDF(31997),
- AOM_ICDF(32768), },
- {AOM_ICDF(12144), AOM_ICDF(12913), AOM_ICDF(27139), AOM_ICDF(27460),
- AOM_ICDF(32768), },
- {AOM_ICDF(8232), AOM_ICDF(8472), AOM_ICDF(21659), AOM_ICDF(21979),
- AOM_ICDF(32768), },
- {AOM_ICDF(3034), AOM_ICDF(4855), AOM_ICDF(17598), AOM_ICDF(19418),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30193), AOM_ICDF(31021), AOM_ICDF(32122), AOM_ICDF(32435),
- AOM_ICDF(32768), },
- {AOM_ICDF(22124), AOM_ICDF(23763), AOM_ICDF(31498), AOM_ICDF(31816),
- AOM_ICDF(32768), },
- {AOM_ICDF(12066), AOM_ICDF(12418), AOM_ICDF(26849), AOM_ICDF(27157),
- AOM_ICDF(32768), },
- {AOM_ICDF(8701), AOM_ICDF(8979), AOM_ICDF(20920), AOM_ICDF(21197),
- AOM_ICDF(32768), },
- {AOM_ICDF(5266), AOM_ICDF(7022), AOM_ICDF(15799), AOM_ICDF(17554),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- { // UV plane
- { // Intra
- { // Band 0
- {AOM_ICDF(23468), AOM_ICDF(24062), AOM_ICDF(30645), AOM_ICDF(31200),
- AOM_ICDF(32193), AOM_ICDF(32768), },
- {AOM_ICDF(12642), AOM_ICDF(14371), AOM_ICDF(26924), AOM_ICDF(28832),
- AOM_ICDF(31098), AOM_ICDF(32768), },
- {AOM_ICDF(7785), AOM_ICDF(8831), AOM_ICDF(23705), AOM_ICDF(26028),
- AOM_ICDF(29979), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(13575), AOM_ICDF(28087), AOM_ICDF(31130), AOM_ICDF(31832),
- AOM_ICDF(32768), },
- {AOM_ICDF(11108), AOM_ICDF(27955), AOM_ICDF(31657), AOM_ICDF(32213),
- AOM_ICDF(32768), },
- {AOM_ICDF(9797), AOM_ICDF(23985), AOM_ICDF(28039), AOM_ICDF(30741),
- AOM_ICDF(32768), },
- {AOM_ICDF(5578), AOM_ICDF(18824), AOM_ICDF(26493), AOM_ICDF(28585),
- AOM_ICDF(32768), },
- {AOM_ICDF(5041), AOM_ICDF(12603), AOM_ICDF(18905), AOM_ICDF(22686),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(17613), AOM_ICDF(26624), AOM_ICDF(30310), AOM_ICDF(31539),
- AOM_ICDF(32768), },
- {AOM_ICDF(11398), AOM_ICDF(22795), AOM_ICDF(29444), AOM_ICDF(30868),
- AOM_ICDF(32768), },
- {AOM_ICDF(8548), AOM_ICDF(15672), AOM_ICDF(22795), AOM_ICDF(28494),
- AOM_ICDF(32768), },
- {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24145), AOM_ICDF(26301), AOM_ICDF(30181), AOM_ICDF(31475),
- AOM_ICDF(32768), },
- {AOM_ICDF(15565), AOM_ICDF(20480), AOM_ICDF(27853), AOM_ICDF(30310),
- AOM_ICDF(32768), },
- {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(27434), AOM_ICDF(28450), AOM_ICDF(30990), AOM_ICDF(31752),
- AOM_ICDF(32768), },
- {AOM_ICDF(14947), AOM_ICDF(21845), AOM_ICDF(29319), AOM_ICDF(31043),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(31130), AOM_ICDF(31676), AOM_ICDF(32180), AOM_ICDF(32474),
- AOM_ICDF(32768), },
- {AOM_ICDF(18289), AOM_ICDF(22099), AOM_ICDF(28196), AOM_ICDF(30482),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- { // Inter
- { // Band 0
- {AOM_ICDF(29436), AOM_ICDF(29775), AOM_ICDF(31685), AOM_ICDF(32029),
- AOM_ICDF(32425), AOM_ICDF(32768), },
- {AOM_ICDF(10536), AOM_ICDF(11074), AOM_ICDF(27753), AOM_ICDF(28385),
- AOM_ICDF(31293), AOM_ICDF(32768), },
- {AOM_ICDF(3010), AOM_ICDF(3521), AOM_ICDF(22603), AOM_ICDF(23227),
- AOM_ICDF(30440), AOM_ICDF(32768), },
- },
- { // Band 1
- {AOM_ICDF(17576), AOM_ICDF(29491), AOM_ICDF(30981), AOM_ICDF(31874),
- AOM_ICDF(32768), },
- {AOM_ICDF(10426), AOM_ICDF(29044), AOM_ICDF(31725), AOM_ICDF(32321),
- AOM_ICDF(32768), },
- {AOM_ICDF(15766), AOM_ICDF(28286), AOM_ICDF(31377), AOM_ICDF(32304),
- AOM_ICDF(32768), },
- {AOM_ICDF(19661), AOM_ICDF(26985), AOM_ICDF(30069), AOM_ICDF(31611),
- AOM_ICDF(32768), },
- {AOM_ICDF(16035), AOM_ICDF(23007), AOM_ICDF(28585), AOM_ICDF(30676),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 2
- {AOM_ICDF(23073), AOM_ICDF(30053), AOM_ICDF(31605), AOM_ICDF(32186),
- AOM_ICDF(32768), },
- {AOM_ICDF(12858), AOM_ICDF(24887), AOM_ICDF(30279), AOM_ICDF(31524),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 3
- {AOM_ICDF(24030), AOM_ICDF(26839), AOM_ICDF(30896), AOM_ICDF(31832),
- AOM_ICDF(32768), },
- {AOM_ICDF(17644), AOM_ICDF(23526), AOM_ICDF(27727), AOM_ICDF(30247),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 4
- {AOM_ICDF(28019), AOM_ICDF(30156), AOM_ICDF(31343), AOM_ICDF(32056),
- AOM_ICDF(32768), },
- {AOM_ICDF(14980), AOM_ICDF(22469), AOM_ICDF(27151), AOM_ICDF(29959),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- { // Band 5
- {AOM_ICDF(30549), AOM_ICDF(31511), AOM_ICDF(32176), AOM_ICDF(32472),
- AOM_ICDF(32768), },
- {AOM_ICDF(15019), AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
- AOM_ICDF(32768), },
- },
- },
- },
- },
-};
-/* clang-format on */
+static const aom_cdf_prob av1_default_coeff_base_multi_cdfs
+ [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
+ [CDF_SIZE(NUM_BASE_LEVELS + 2)] =
+ { { { { { AOM_CDF4(4034, 8930, 12727) },
+ { AOM_CDF4(18082, 29741, 31877) },
+ { AOM_CDF4(12596, 26124, 30493) },
+ { AOM_CDF4(9446, 21118, 27005) },
+ { AOM_CDF4(6308, 15141, 21279) },
+ { AOM_CDF4(2463, 6357, 9783) },
+ { AOM_CDF4(20667, 30546, 31929) },
+ { AOM_CDF4(13043, 26123, 30134) },
+ { AOM_CDF4(8151, 18757, 24778) },
+ { AOM_CDF4(5255, 12839, 18632) },
+ { AOM_CDF4(2820, 7206, 11161) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(15736, 27553, 30604) },
+ { AOM_CDF4(11210, 23794, 28787) },
+ { AOM_CDF4(5947, 13874, 19701) },
+ { AOM_CDF4(4215, 9323, 13891) },
+ { AOM_CDF4(2833, 6462, 10059) },
+ { AOM_CDF4(19605, 30393, 31582) },
+ { AOM_CDF4(13523, 26252, 30248) },
+ { AOM_CDF4(8446, 18622, 24512) },
+ { AOM_CDF4(3818, 10343, 15974) },
+ { AOM_CDF4(1481, 4117, 6796) },
+ { AOM_CDF4(22649, 31302, 32190) },
+ { AOM_CDF4(14829, 27127, 30449) },
+ { AOM_CDF4(8313, 17702, 23304) },
+ { AOM_CDF4(3022, 8301, 12786) },
+ { AOM_CDF4(1536, 4412, 7184) },
+ { AOM_CDF4(22354, 29774, 31372) },
+ { AOM_CDF4(14723, 25472, 29214) },
+ { AOM_CDF4(6673, 13745, 18662) },
+ { AOM_CDF4(2068, 5766, 9322) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(6302, 16444, 21761) },
+ { AOM_CDF4(23040, 31538, 32475) },
+ { AOM_CDF4(15196, 28452, 31496) },
+ { AOM_CDF4(10020, 22946, 28514) },
+ { AOM_CDF4(6533, 16862, 23501) },
+ { AOM_CDF4(3538, 9816, 15076) },
+ { AOM_CDF4(24444, 31875, 32525) },
+ { AOM_CDF4(15881, 28924, 31635) },
+ { AOM_CDF4(9922, 22873, 28466) },
+ { AOM_CDF4(6527, 16966, 23691) },
+ { AOM_CDF4(4114, 11303, 17220) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(20201, 30770, 32209) },
+ { AOM_CDF4(14754, 28071, 31258) },
+ { AOM_CDF4(8378, 20186, 26517) },
+ { AOM_CDF4(5916, 15299, 21978) },
+ { AOM_CDF4(4268, 11583, 17901) },
+ { AOM_CDF4(24361, 32025, 32581) },
+ { AOM_CDF4(18673, 30105, 31943) },
+ { AOM_CDF4(10196, 22244, 27576) },
+ { AOM_CDF4(5495, 14349, 20417) },
+ { AOM_CDF4(2676, 7415, 11498) },
+ { AOM_CDF4(24678, 31958, 32585) },
+ { AOM_CDF4(18629, 29906, 31831) },
+ { AOM_CDF4(9364, 20724, 26315) },
+ { AOM_CDF4(4641, 12318, 18094) },
+ { AOM_CDF4(2758, 7387, 11579) },
+ { AOM_CDF4(25433, 31842, 32469) },
+ { AOM_CDF4(18795, 29289, 31411) },
+ { AOM_CDF4(7644, 17584, 23592) },
+ { AOM_CDF4(3408, 9014, 15047) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(4536, 10072, 14001) },
+ { AOM_CDF4(25459, 31416, 32206) },
+ { AOM_CDF4(16605, 28048, 30818) },
+ { AOM_CDF4(11008, 22857, 27719) },
+ { AOM_CDF4(6915, 16268, 22315) },
+ { AOM_CDF4(2625, 6812, 10537) },
+ { AOM_CDF4(24257, 31788, 32499) },
+ { AOM_CDF4(16880, 29454, 31879) },
+ { AOM_CDF4(11958, 25054, 29778) },
+ { AOM_CDF4(7916, 18718, 25084) },
+ { AOM_CDF4(3383, 8777, 13446) },
+ { AOM_CDF4(22720, 31603, 32393) },
+ { AOM_CDF4(14960, 28125, 31335) },
+ { AOM_CDF4(9731, 22210, 27928) },
+ { AOM_CDF4(6304, 15832, 22277) },
+ { AOM_CDF4(2910, 7818, 12166) },
+ { AOM_CDF4(20375, 30627, 32131) },
+ { AOM_CDF4(13904, 27284, 30887) },
+ { AOM_CDF4(9368, 21558, 27144) },
+ { AOM_CDF4(5937, 14966, 21119) },
+ { AOM_CDF4(2667, 7225, 11319) },
+ { AOM_CDF4(23970, 31470, 32378) },
+ { AOM_CDF4(17173, 29734, 32018) },
+ { AOM_CDF4(12795, 25441, 29965) },
+ { AOM_CDF4(8981, 19680, 25893) },
+ { AOM_CDF4(4728, 11372, 16902) },
+ { AOM_CDF4(24287, 31797, 32439) },
+ { AOM_CDF4(16703, 29145, 31696) },
+ { AOM_CDF4(10833, 23554, 28725) },
+ { AOM_CDF4(6468, 16566, 23057) },
+ { AOM_CDF4(2415, 6562, 10278) },
+ { AOM_CDF4(26610, 32395, 32659) },
+ { AOM_CDF4(18590, 30498, 32117) },
+ { AOM_CDF4(12420, 25756, 29950) },
+ { AOM_CDF4(7639, 18746, 24710) },
+ { AOM_CDF4(3001, 8086, 12347) },
+ { AOM_CDF4(25076, 32064, 32580) },
+ { AOM_CDF4(17946, 30128, 32028) },
+ { AOM_CDF4(12024, 24985, 29378) },
+ { AOM_CDF4(7517, 18390, 24304) },
+ { AOM_CDF4(3243, 8781, 13331) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(6037, 16771, 21957) },
+ { AOM_CDF4(24774, 31704, 32426) },
+ { AOM_CDF4(16830, 28589, 31056) },
+ { AOM_CDF4(10602, 22828, 27760) },
+ { AOM_CDF4(6733, 16829, 23071) },
+ { AOM_CDF4(3250, 8914, 13556) },
+ { AOM_CDF4(25582, 32220, 32668) },
+ { AOM_CDF4(18659, 30342, 32223) },
+ { AOM_CDF4(12546, 26149, 30515) },
+ { AOM_CDF4(8420, 20451, 26801) },
+ { AOM_CDF4(4636, 12420, 18344) },
+ { AOM_CDF4(27581, 32362, 32639) },
+ { AOM_CDF4(18987, 30083, 31978) },
+ { AOM_CDF4(11327, 24248, 29084) },
+ { AOM_CDF4(7264, 17719, 24120) },
+ { AOM_CDF4(3995, 10768, 16169) },
+ { AOM_CDF4(25893, 31831, 32487) },
+ { AOM_CDF4(16577, 28587, 31379) },
+ { AOM_CDF4(10189, 22748, 28182) },
+ { AOM_CDF4(6832, 17094, 23556) },
+ { AOM_CDF4(3708, 10110, 15334) },
+ { AOM_CDF4(25904, 32282, 32656) },
+ { AOM_CDF4(19721, 30792, 32276) },
+ { AOM_CDF4(12819, 26243, 30411) },
+ { AOM_CDF4(8572, 20614, 26891) },
+ { AOM_CDF4(5364, 14059, 20467) },
+ { AOM_CDF4(26580, 32438, 32677) },
+ { AOM_CDF4(20852, 31225, 32340) },
+ { AOM_CDF4(12435, 25700, 29967) },
+ { AOM_CDF4(8691, 20825, 26976) },
+ { AOM_CDF4(4446, 12209, 17269) },
+ { AOM_CDF4(27350, 32429, 32696) },
+ { AOM_CDF4(21372, 30977, 32272) },
+ { AOM_CDF4(12673, 25270, 29853) },
+ { AOM_CDF4(9208, 20925, 26640) },
+ { AOM_CDF4(5018, 13351, 18732) },
+ { AOM_CDF4(27351, 32479, 32713) },
+ { AOM_CDF4(21398, 31209, 32387) },
+ { AOM_CDF4(12162, 25047, 29842) },
+ { AOM_CDF4(7896, 18691, 25319) },
+ { AOM_CDF4(4670, 12882, 18881) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(5487, 10460, 13708) },
+ { AOM_CDF4(21597, 28303, 30674) },
+ { AOM_CDF4(11037, 21953, 26476) },
+ { AOM_CDF4(8147, 17962, 22952) },
+ { AOM_CDF4(5242, 13061, 18532) },
+ { AOM_CDF4(1889, 5208, 8182) },
+ { AOM_CDF4(26774, 32133, 32590) },
+ { AOM_CDF4(17844, 29564, 31767) },
+ { AOM_CDF4(11690, 24438, 29171) },
+ { AOM_CDF4(7542, 18215, 24459) },
+ { AOM_CDF4(2993, 8050, 12319) },
+ { AOM_CDF4(28023, 32328, 32591) },
+ { AOM_CDF4(18651, 30126, 31954) },
+ { AOM_CDF4(12164, 25146, 29589) },
+ { AOM_CDF4(7762, 18530, 24771) },
+ { AOM_CDF4(3492, 9183, 13920) },
+ { AOM_CDF4(27591, 32008, 32491) },
+ { AOM_CDF4(17149, 28853, 31510) },
+ { AOM_CDF4(11485, 24003, 28860) },
+ { AOM_CDF4(7697, 18086, 24210) },
+ { AOM_CDF4(3075, 7999, 12218) },
+ { AOM_CDF4(28268, 32482, 32654) },
+ { AOM_CDF4(19631, 31051, 32404) },
+ { AOM_CDF4(13860, 27260, 31020) },
+ { AOM_CDF4(9605, 21613, 27594) },
+ { AOM_CDF4(4876, 12162, 17908) },
+ { AOM_CDF4(27248, 32316, 32576) },
+ { AOM_CDF4(18955, 30457, 32075) },
+ { AOM_CDF4(11824, 23997, 28795) },
+ { AOM_CDF4(7346, 18196, 24647) },
+ { AOM_CDF4(3403, 9247, 14111) },
+ { AOM_CDF4(29711, 32655, 32735) },
+ { AOM_CDF4(21169, 31394, 32417) },
+ { AOM_CDF4(13487, 27198, 30957) },
+ { AOM_CDF4(8828, 21683, 27614) },
+ { AOM_CDF4(4270, 11451, 17038) },
+ { AOM_CDF4(28708, 32578, 32731) },
+ { AOM_CDF4(20120, 31241, 32482) },
+ { AOM_CDF4(13692, 27550, 31321) },
+ { AOM_CDF4(9418, 22514, 28439) },
+ { AOM_CDF4(4999, 13283, 19462) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(5673, 14302, 19711) },
+ { AOM_CDF4(26251, 30701, 31834) },
+ { AOM_CDF4(12782, 23783, 27803) },
+ { AOM_CDF4(9127, 20657, 25808) },
+ { AOM_CDF4(6368, 16208, 21462) },
+ { AOM_CDF4(2465, 7177, 10822) },
+ { AOM_CDF4(29961, 32563, 32719) },
+ { AOM_CDF4(18318, 29891, 31949) },
+ { AOM_CDF4(11361, 24514, 29357) },
+ { AOM_CDF4(7900, 19603, 25607) },
+ { AOM_CDF4(4002, 10590, 15546) },
+ { AOM_CDF4(29637, 32310, 32595) },
+ { AOM_CDF4(18296, 29913, 31809) },
+ { AOM_CDF4(10144, 21515, 26871) },
+ { AOM_CDF4(5358, 14322, 20394) },
+ { AOM_CDF4(3067, 8362, 13346) },
+ { AOM_CDF4(28652, 32470, 32676) },
+ { AOM_CDF4(17538, 30771, 32209) },
+ { AOM_CDF4(13924, 26882, 30494) },
+ { AOM_CDF4(10496, 22837, 27869) },
+ { AOM_CDF4(7236, 16396, 21621) },
+ { AOM_CDF4(30743, 32687, 32746) },
+ { AOM_CDF4(23006, 31676, 32489) },
+ { AOM_CDF4(14494, 27828, 31120) },
+ { AOM_CDF4(10174, 22801, 28352) },
+ { AOM_CDF4(6242, 15281, 21043) },
+ { AOM_CDF4(25817, 32243, 32720) },
+ { AOM_CDF4(18618, 31367, 32325) },
+ { AOM_CDF4(13997, 28318, 31878) },
+ { AOM_CDF4(12255, 26534, 31383) },
+ { AOM_CDF4(9561, 21588, 28450) },
+ { AOM_CDF4(28188, 32635, 32724) },
+ { AOM_CDF4(22060, 32365, 32728) },
+ { AOM_CDF4(18102, 30690, 32528) },
+ { AOM_CDF4(14196, 28864, 31999) },
+ { AOM_CDF4(12262, 25792, 30865) },
+ { AOM_CDF4(24176, 32109, 32628) },
+ { AOM_CDF4(18280, 29681, 31963) },
+ { AOM_CDF4(10205, 23703, 29664) },
+ { AOM_CDF4(7889, 20025, 27676) },
+ { AOM_CDF4(6060, 16743, 23970) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(5141, 7096, 8260) },
+ { AOM_CDF4(27186, 29022, 29789) },
+ { AOM_CDF4(6668, 12568, 15682) },
+ { AOM_CDF4(2172, 6181, 8638) },
+ { AOM_CDF4(1126, 3379, 4531) },
+ { AOM_CDF4(443, 1361, 2254) },
+ { AOM_CDF4(26083, 31153, 32436) },
+ { AOM_CDF4(13486, 24603, 28483) },
+ { AOM_CDF4(6508, 14840, 19910) },
+ { AOM_CDF4(3386, 8800, 13286) },
+ { AOM_CDF4(1530, 4322, 7054) },
+ { AOM_CDF4(29639, 32080, 32548) },
+ { AOM_CDF4(15897, 27552, 30290) },
+ { AOM_CDF4(8588, 20047, 25383) },
+ { AOM_CDF4(4889, 13339, 19269) },
+ { AOM_CDF4(2240, 6871, 10498) },
+ { AOM_CDF4(28165, 32197, 32517) },
+ { AOM_CDF4(20735, 30427, 31568) },
+ { AOM_CDF4(14325, 24671, 27692) },
+ { AOM_CDF4(5119, 12554, 17805) },
+ { AOM_CDF4(1810, 5441, 8261) },
+ { AOM_CDF4(31212, 32724, 32748) },
+ { AOM_CDF4(23352, 31766, 32545) },
+ { AOM_CDF4(14669, 27570, 31059) },
+ { AOM_CDF4(8492, 20894, 27272) },
+ { AOM_CDF4(3644, 10194, 15204) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(2461, 7013, 9371) },
+ { AOM_CDF4(24749, 29600, 30986) },
+ { AOM_CDF4(9466, 19037, 22417) },
+ { AOM_CDF4(3584, 9280, 14400) },
+ { AOM_CDF4(1505, 3929, 5433) },
+ { AOM_CDF4(677, 1500, 2736) },
+ { AOM_CDF4(23987, 30702, 32117) },
+ { AOM_CDF4(13554, 24571, 29263) },
+ { AOM_CDF4(6211, 14556, 21155) },
+ { AOM_CDF4(3135, 10972, 15625) },
+ { AOM_CDF4(2435, 7127, 11427) },
+ { AOM_CDF4(31300, 32532, 32550) },
+ { AOM_CDF4(14757, 30365, 31954) },
+ { AOM_CDF4(4405, 11612, 18553) },
+ { AOM_CDF4(580, 4132, 7322) },
+ { AOM_CDF4(1695, 10169, 14124) },
+ { AOM_CDF4(30008, 32282, 32591) },
+ { AOM_CDF4(19244, 30108, 31748) },
+ { AOM_CDF4(11180, 24158, 29555) },
+ { AOM_CDF4(5650, 14972, 19209) },
+ { AOM_CDF4(2114, 5109, 8456) },
+ { AOM_CDF4(31856, 32716, 32748) },
+ { AOM_CDF4(23012, 31664, 32572) },
+ { AOM_CDF4(13694, 26656, 30636) },
+ { AOM_CDF4(8142, 19508, 26093) },
+ { AOM_CDF4(4253, 10955, 16724) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(601, 983, 1311) },
+ { AOM_CDF4(18725, 23406, 28087) },
+ { AOM_CDF4(5461, 8192, 10923) },
+ { AOM_CDF4(3781, 15124, 21425) },
+ { AOM_CDF4(2587, 7761, 12072) },
+ { AOM_CDF4(106, 458, 810) },
+ { AOM_CDF4(22282, 29710, 31894) },
+ { AOM_CDF4(8508, 20926, 25984) },
+ { AOM_CDF4(3726, 12713, 18083) },
+ { AOM_CDF4(1620, 7112, 10893) },
+ { AOM_CDF4(729, 2236, 3495) },
+ { AOM_CDF4(30163, 32474, 32684) },
+ { AOM_CDF4(18304, 30464, 32000) },
+ { AOM_CDF4(11443, 26526, 29647) },
+ { AOM_CDF4(6007, 15292, 21299) },
+ { AOM_CDF4(2234, 6703, 8937) },
+ { AOM_CDF4(30954, 32177, 32571) },
+ { AOM_CDF4(17363, 29562, 31076) },
+ { AOM_CDF4(9686, 22464, 27410) },
+ { AOM_CDF4(8192, 16384, 21390) },
+ { AOM_CDF4(1755, 8046, 11264) },
+ { AOM_CDF4(31168, 32734, 32748) },
+ { AOM_CDF4(22486, 31441, 32471) },
+ { AOM_CDF4(12833, 25627, 29738) },
+ { AOM_CDF4(6980, 17379, 23122) },
+ { AOM_CDF4(3111, 8887, 13479) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(6041, 11854, 15927) },
+ { AOM_CDF4(20326, 30905, 32251) },
+ { AOM_CDF4(14164, 26831, 30725) },
+ { AOM_CDF4(9760, 20647, 26585) },
+ { AOM_CDF4(6416, 14953, 21219) },
+ { AOM_CDF4(2966, 7151, 10891) },
+ { AOM_CDF4(23567, 31374, 32254) },
+ { AOM_CDF4(14978, 27416, 30946) },
+ { AOM_CDF4(9434, 20225, 26254) },
+ { AOM_CDF4(6658, 14558, 20535) },
+ { AOM_CDF4(3916, 8677, 12989) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(18088, 29545, 31587) },
+ { AOM_CDF4(13062, 25843, 30073) },
+ { AOM_CDF4(8940, 16827, 22251) },
+ { AOM_CDF4(7654, 13220, 17973) },
+ { AOM_CDF4(5733, 10316, 14456) },
+ { AOM_CDF4(22879, 31388, 32114) },
+ { AOM_CDF4(15215, 27993, 30955) },
+ { AOM_CDF4(9397, 19445, 24978) },
+ { AOM_CDF4(3442, 9813, 15344) },
+ { AOM_CDF4(1368, 3936, 6532) },
+ { AOM_CDF4(25494, 32033, 32406) },
+ { AOM_CDF4(16772, 27963, 30718) },
+ { AOM_CDF4(9419, 18165, 23260) },
+ { AOM_CDF4(2677, 7501, 11797) },
+ { AOM_CDF4(1516, 4344, 7170) },
+ { AOM_CDF4(26556, 31454, 32101) },
+ { AOM_CDF4(17128, 27035, 30108) },
+ { AOM_CDF4(8324, 15344, 20249) },
+ { AOM_CDF4(1903, 5696, 9469) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8455, 19003, 24368) },
+ { AOM_CDF4(23563, 32021, 32604) },
+ { AOM_CDF4(16237, 29446, 31935) },
+ { AOM_CDF4(10724, 23999, 29358) },
+ { AOM_CDF4(6725, 17528, 24416) },
+ { AOM_CDF4(3927, 10927, 16825) },
+ { AOM_CDF4(26313, 32288, 32634) },
+ { AOM_CDF4(17430, 30095, 32095) },
+ { AOM_CDF4(11116, 24606, 29679) },
+ { AOM_CDF4(7195, 18384, 25269) },
+ { AOM_CDF4(4726, 12852, 19315) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(22822, 31648, 32483) },
+ { AOM_CDF4(16724, 29633, 31929) },
+ { AOM_CDF4(10261, 23033, 28725) },
+ { AOM_CDF4(7029, 17840, 24528) },
+ { AOM_CDF4(4867, 13886, 21502) },
+ { AOM_CDF4(25298, 31892, 32491) },
+ { AOM_CDF4(17809, 29330, 31512) },
+ { AOM_CDF4(9668, 21329, 26579) },
+ { AOM_CDF4(4774, 12956, 18976) },
+ { AOM_CDF4(2322, 7030, 11540) },
+ { AOM_CDF4(25472, 31920, 32543) },
+ { AOM_CDF4(17957, 29387, 31632) },
+ { AOM_CDF4(9196, 20593, 26400) },
+ { AOM_CDF4(4680, 12705, 19202) },
+ { AOM_CDF4(2917, 8456, 13436) },
+ { AOM_CDF4(26471, 32059, 32574) },
+ { AOM_CDF4(18458, 29783, 31909) },
+ { AOM_CDF4(8400, 19464, 25956) },
+ { AOM_CDF4(3812, 10973, 17206) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(6779, 13743, 17678) },
+ { AOM_CDF4(24806, 31797, 32457) },
+ { AOM_CDF4(17616, 29047, 31372) },
+ { AOM_CDF4(11063, 23175, 28003) },
+ { AOM_CDF4(6521, 16110, 22324) },
+ { AOM_CDF4(2764, 7504, 11654) },
+ { AOM_CDF4(25266, 32367, 32637) },
+ { AOM_CDF4(19054, 30553, 32175) },
+ { AOM_CDF4(12139, 25212, 29807) },
+ { AOM_CDF4(7311, 18162, 24704) },
+ { AOM_CDF4(3397, 9164, 14074) },
+ { AOM_CDF4(25988, 32208, 32522) },
+ { AOM_CDF4(16253, 28912, 31526) },
+ { AOM_CDF4(9151, 21387, 27372) },
+ { AOM_CDF4(5688, 14915, 21496) },
+ { AOM_CDF4(2717, 7627, 12004) },
+ { AOM_CDF4(23144, 31855, 32443) },
+ { AOM_CDF4(16070, 28491, 31325) },
+ { AOM_CDF4(8702, 20467, 26517) },
+ { AOM_CDF4(5243, 13956, 20367) },
+ { AOM_CDF4(2621, 7335, 11567) },
+ { AOM_CDF4(26636, 32340, 32630) },
+ { AOM_CDF4(19990, 31050, 32341) },
+ { AOM_CDF4(13243, 26105, 30315) },
+ { AOM_CDF4(8588, 19521, 25918) },
+ { AOM_CDF4(4717, 11585, 17304) },
+ { AOM_CDF4(25844, 32292, 32582) },
+ { AOM_CDF4(19090, 30635, 32097) },
+ { AOM_CDF4(11963, 24546, 28939) },
+ { AOM_CDF4(6218, 16087, 22354) },
+ { AOM_CDF4(2340, 6608, 10426) },
+ { AOM_CDF4(28046, 32576, 32694) },
+ { AOM_CDF4(21178, 31313, 32296) },
+ { AOM_CDF4(13486, 26184, 29870) },
+ { AOM_CDF4(7149, 17871, 23723) },
+ { AOM_CDF4(2833, 7958, 12259) },
+ { AOM_CDF4(27710, 32528, 32686) },
+ { AOM_CDF4(20674, 31076, 32268) },
+ { AOM_CDF4(12413, 24955, 29243) },
+ { AOM_CDF4(6676, 16927, 23097) },
+ { AOM_CDF4(2966, 8333, 12919) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8639, 19339, 24429) },
+ { AOM_CDF4(24404, 31837, 32525) },
+ { AOM_CDF4(16997, 29425, 31784) },
+ { AOM_CDF4(11253, 24234, 29149) },
+ { AOM_CDF4(6751, 17394, 24028) },
+ { AOM_CDF4(3490, 9830, 15191) },
+ { AOM_CDF4(26283, 32471, 32714) },
+ { AOM_CDF4(19599, 31168, 32442) },
+ { AOM_CDF4(13146, 26954, 30893) },
+ { AOM_CDF4(8214, 20588, 26890) },
+ { AOM_CDF4(4699, 13081, 19300) },
+ { AOM_CDF4(28212, 32458, 32669) },
+ { AOM_CDF4(18594, 30316, 32100) },
+ { AOM_CDF4(11219, 24408, 29234) },
+ { AOM_CDF4(6865, 17656, 24149) },
+ { AOM_CDF4(3678, 10362, 16006) },
+ { AOM_CDF4(25825, 32136, 32616) },
+ { AOM_CDF4(17313, 29853, 32021) },
+ { AOM_CDF4(11197, 24471, 29472) },
+ { AOM_CDF4(6947, 17781, 24405) },
+ { AOM_CDF4(3768, 10660, 16261) },
+ { AOM_CDF4(27352, 32500, 32706) },
+ { AOM_CDF4(20850, 31468, 32469) },
+ { AOM_CDF4(14021, 27707, 31133) },
+ { AOM_CDF4(8964, 21748, 27838) },
+ { AOM_CDF4(5437, 14665, 21187) },
+ { AOM_CDF4(26304, 32492, 32698) },
+ { AOM_CDF4(20409, 31380, 32385) },
+ { AOM_CDF4(13682, 27222, 30632) },
+ { AOM_CDF4(8974, 21236, 26685) },
+ { AOM_CDF4(4234, 11665, 16934) },
+ { AOM_CDF4(26273, 32357, 32711) },
+ { AOM_CDF4(20672, 31242, 32441) },
+ { AOM_CDF4(14172, 27254, 30902) },
+ { AOM_CDF4(9870, 21898, 27275) },
+ { AOM_CDF4(5164, 13506, 19270) },
+ { AOM_CDF4(26725, 32459, 32728) },
+ { AOM_CDF4(20991, 31442, 32527) },
+ { AOM_CDF4(13071, 26434, 30811) },
+ { AOM_CDF4(8184, 20090, 26742) },
+ { AOM_CDF4(4803, 13255, 19895) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(7555, 14942, 18501) },
+ { AOM_CDF4(24410, 31178, 32287) },
+ { AOM_CDF4(14394, 26738, 30253) },
+ { AOM_CDF4(8413, 19554, 25195) },
+ { AOM_CDF4(4766, 12924, 18785) },
+ { AOM_CDF4(2029, 5806, 9207) },
+ { AOM_CDF4(26776, 32364, 32663) },
+ { AOM_CDF4(18732, 29967, 31931) },
+ { AOM_CDF4(11005, 23786, 28852) },
+ { AOM_CDF4(6466, 16909, 23510) },
+ { AOM_CDF4(3044, 8638, 13419) },
+ { AOM_CDF4(29208, 32582, 32704) },
+ { AOM_CDF4(20068, 30857, 32208) },
+ { AOM_CDF4(12003, 25085, 29595) },
+ { AOM_CDF4(6947, 17750, 24189) },
+ { AOM_CDF4(3245, 9103, 14007) },
+ { AOM_CDF4(27359, 32465, 32669) },
+ { AOM_CDF4(19421, 30614, 32174) },
+ { AOM_CDF4(11915, 25010, 29579) },
+ { AOM_CDF4(6950, 17676, 24074) },
+ { AOM_CDF4(3007, 8473, 13096) },
+ { AOM_CDF4(29002, 32676, 32735) },
+ { AOM_CDF4(22102, 31849, 32576) },
+ { AOM_CDF4(14408, 28009, 31405) },
+ { AOM_CDF4(9027, 21679, 27931) },
+ { AOM_CDF4(4694, 12678, 18748) },
+ { AOM_CDF4(28216, 32528, 32682) },
+ { AOM_CDF4(20849, 31264, 32318) },
+ { AOM_CDF4(12756, 25815, 29751) },
+ { AOM_CDF4(7565, 18801, 24923) },
+ { AOM_CDF4(3509, 9533, 14477) },
+ { AOM_CDF4(30133, 32687, 32739) },
+ { AOM_CDF4(23063, 31910, 32515) },
+ { AOM_CDF4(14588, 28051, 31132) },
+ { AOM_CDF4(9085, 21649, 27457) },
+ { AOM_CDF4(4261, 11654, 17264) },
+ { AOM_CDF4(29518, 32691, 32748) },
+ { AOM_CDF4(22451, 31959, 32613) },
+ { AOM_CDF4(14864, 28722, 31700) },
+ { AOM_CDF4(9695, 22964, 28716) },
+ { AOM_CDF4(4932, 13358, 19502) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(6465, 16958, 21688) },
+ { AOM_CDF4(25199, 31514, 32360) },
+ { AOM_CDF4(14774, 27149, 30607) },
+ { AOM_CDF4(9257, 21438, 26972) },
+ { AOM_CDF4(5723, 15183, 21882) },
+ { AOM_CDF4(3150, 8879, 13731) },
+ { AOM_CDF4(26989, 32262, 32682) },
+ { AOM_CDF4(17396, 29937, 32085) },
+ { AOM_CDF4(11387, 24901, 29784) },
+ { AOM_CDF4(7289, 18821, 25548) },
+ { AOM_CDF4(3734, 10577, 16086) },
+ { AOM_CDF4(29728, 32501, 32695) },
+ { AOM_CDF4(17431, 29701, 31903) },
+ { AOM_CDF4(9921, 22826, 28300) },
+ { AOM_CDF4(5896, 15434, 22068) },
+ { AOM_CDF4(3430, 9646, 14757) },
+ { AOM_CDF4(28614, 32511, 32705) },
+ { AOM_CDF4(19364, 30638, 32263) },
+ { AOM_CDF4(13129, 26254, 30402) },
+ { AOM_CDF4(8754, 20484, 26440) },
+ { AOM_CDF4(4378, 11607, 17110) },
+ { AOM_CDF4(30292, 32671, 32744) },
+ { AOM_CDF4(21780, 31603, 32501) },
+ { AOM_CDF4(14314, 27829, 31291) },
+ { AOM_CDF4(9611, 22327, 28263) },
+ { AOM_CDF4(4890, 13087, 19065) },
+ { AOM_CDF4(25862, 32567, 32733) },
+ { AOM_CDF4(20794, 32050, 32567) },
+ { AOM_CDF4(17243, 30625, 32254) },
+ { AOM_CDF4(13283, 27628, 31474) },
+ { AOM_CDF4(9669, 22532, 28918) },
+ { AOM_CDF4(27435, 32697, 32748) },
+ { AOM_CDF4(24922, 32390, 32714) },
+ { AOM_CDF4(21449, 31504, 32536) },
+ { AOM_CDF4(16392, 29729, 31832) },
+ { AOM_CDF4(11692, 24884, 29076) },
+ { AOM_CDF4(24193, 32290, 32735) },
+ { AOM_CDF4(18909, 31104, 32563) },
+ { AOM_CDF4(12236, 26841, 31403) },
+ { AOM_CDF4(8171, 21840, 29082) },
+ { AOM_CDF4(7224, 17280, 25275) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(3078, 6839, 9890) },
+ { AOM_CDF4(13837, 20450, 24479) },
+ { AOM_CDF4(5914, 14222, 19328) },
+ { AOM_CDF4(3866, 10267, 14762) },
+ { AOM_CDF4(2612, 7208, 11042) },
+ { AOM_CDF4(1067, 2991, 4776) },
+ { AOM_CDF4(25817, 31646, 32529) },
+ { AOM_CDF4(13708, 26338, 30385) },
+ { AOM_CDF4(7328, 18585, 24870) },
+ { AOM_CDF4(4691, 13080, 19276) },
+ { AOM_CDF4(1825, 5253, 8352) },
+ { AOM_CDF4(29386, 32315, 32624) },
+ { AOM_CDF4(17160, 29001, 31360) },
+ { AOM_CDF4(9602, 21862, 27396) },
+ { AOM_CDF4(5915, 15772, 22148) },
+ { AOM_CDF4(2786, 7779, 12047) },
+ { AOM_CDF4(29246, 32450, 32663) },
+ { AOM_CDF4(18696, 29929, 31818) },
+ { AOM_CDF4(10510, 23369, 28560) },
+ { AOM_CDF4(6229, 16499, 23125) },
+ { AOM_CDF4(2608, 7448, 11705) },
+ { AOM_CDF4(30753, 32710, 32748) },
+ { AOM_CDF4(21638, 31487, 32503) },
+ { AOM_CDF4(12937, 26854, 30870) },
+ { AOM_CDF4(8182, 20596, 26970) },
+ { AOM_CDF4(3637, 10269, 15497) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(5244, 12150, 16906) },
+ { AOM_CDF4(20486, 26858, 29701) },
+ { AOM_CDF4(7756, 18317, 23735) },
+ { AOM_CDF4(3452, 9256, 13146) },
+ { AOM_CDF4(2020, 5206, 8229) },
+ { AOM_CDF4(1801, 4993, 7903) },
+ { AOM_CDF4(27051, 31858, 32531) },
+ { AOM_CDF4(15988, 27531, 30619) },
+ { AOM_CDF4(9188, 21484, 26719) },
+ { AOM_CDF4(6273, 17186, 23800) },
+ { AOM_CDF4(3108, 9355, 14764) },
+ { AOM_CDF4(31076, 32520, 32680) },
+ { AOM_CDF4(18119, 30037, 31850) },
+ { AOM_CDF4(10244, 22969, 27472) },
+ { AOM_CDF4(4692, 14077, 19273) },
+ { AOM_CDF4(3694, 11677, 17556) },
+ { AOM_CDF4(30060, 32581, 32720) },
+ { AOM_CDF4(21011, 30775, 32120) },
+ { AOM_CDF4(11931, 24820, 29289) },
+ { AOM_CDF4(7119, 17662, 24356) },
+ { AOM_CDF4(3833, 10706, 16304) },
+ { AOM_CDF4(31954, 32731, 32748) },
+ { AOM_CDF4(23913, 31724, 32489) },
+ { AOM_CDF4(15520, 28060, 31286) },
+ { AOM_CDF4(11517, 23008, 28571) },
+ { AOM_CDF4(6193, 14508, 20629) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(1035, 2807, 4156) },
+ { AOM_CDF4(13162, 18138, 20939) },
+ { AOM_CDF4(2696, 6633, 8755) },
+ { AOM_CDF4(1373, 4161, 6853) },
+ { AOM_CDF4(1099, 2746, 4716) },
+ { AOM_CDF4(340, 1021, 1599) },
+ { AOM_CDF4(22826, 30419, 32135) },
+ { AOM_CDF4(10395, 21762, 26942) },
+ { AOM_CDF4(4726, 12407, 17361) },
+ { AOM_CDF4(2447, 7080, 10593) },
+ { AOM_CDF4(1227, 3717, 6011) },
+ { AOM_CDF4(28156, 31424, 31934) },
+ { AOM_CDF4(16915, 27754, 30373) },
+ { AOM_CDF4(9148, 20990, 26431) },
+ { AOM_CDF4(5950, 15515, 21148) },
+ { AOM_CDF4(2492, 7327, 11526) },
+ { AOM_CDF4(30602, 32477, 32670) },
+ { AOM_CDF4(20026, 29955, 31568) },
+ { AOM_CDF4(11220, 23628, 28105) },
+ { AOM_CDF4(6652, 17019, 22973) },
+ { AOM_CDF4(3064, 8536, 13043) },
+ { AOM_CDF4(31769, 32724, 32748) },
+ { AOM_CDF4(22230, 30887, 32373) },
+ { AOM_CDF4(12234, 25079, 29731) },
+ { AOM_CDF4(7326, 18816, 25353) },
+ { AOM_CDF4(3933, 10907, 16616) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(8896, 16227, 20630) },
+ { AOM_CDF4(23629, 31782, 32527) },
+ { AOM_CDF4(15173, 27755, 31321) },
+ { AOM_CDF4(10158, 21233, 27382) },
+ { AOM_CDF4(6420, 14857, 21558) },
+ { AOM_CDF4(3269, 8155, 12646) },
+ { AOM_CDF4(24835, 32009, 32496) },
+ { AOM_CDF4(16509, 28421, 31579) },
+ { AOM_CDF4(10957, 21514, 27418) },
+ { AOM_CDF4(7881, 15930, 22096) },
+ { AOM_CDF4(5388, 10960, 15918) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(20745, 30773, 32093) },
+ { AOM_CDF4(15200, 27221, 30861) },
+ { AOM_CDF4(13032, 20873, 25667) },
+ { AOM_CDF4(12285, 18663, 23494) },
+ { AOM_CDF4(11563, 17481, 21489) },
+ { AOM_CDF4(26260, 31982, 32320) },
+ { AOM_CDF4(15397, 28083, 31100) },
+ { AOM_CDF4(9742, 19217, 24824) },
+ { AOM_CDF4(3261, 9629, 15362) },
+ { AOM_CDF4(1480, 4322, 7499) },
+ { AOM_CDF4(27599, 32256, 32460) },
+ { AOM_CDF4(16857, 27659, 30774) },
+ { AOM_CDF4(9551, 18290, 23748) },
+ { AOM_CDF4(3052, 8933, 14103) },
+ { AOM_CDF4(2021, 5910, 9787) },
+ { AOM_CDF4(29005, 32015, 32392) },
+ { AOM_CDF4(17677, 27694, 30863) },
+ { AOM_CDF4(9204, 17356, 23219) },
+ { AOM_CDF4(2403, 7516, 12814) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(10808, 22056, 26896) },
+ { AOM_CDF4(25739, 32313, 32676) },
+ { AOM_CDF4(17288, 30203, 32221) },
+ { AOM_CDF4(11359, 24878, 29896) },
+ { AOM_CDF4(6949, 17767, 24893) },
+ { AOM_CDF4(4287, 11796, 18071) },
+ { AOM_CDF4(27880, 32521, 32705) },
+ { AOM_CDF4(19038, 31004, 32414) },
+ { AOM_CDF4(12564, 26345, 30768) },
+ { AOM_CDF4(8269, 19947, 26779) },
+ { AOM_CDF4(5674, 14657, 21674) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(25742, 32319, 32671) },
+ { AOM_CDF4(19557, 31164, 32454) },
+ { AOM_CDF4(13381, 26381, 30755) },
+ { AOM_CDF4(10101, 21466, 26722) },
+ { AOM_CDF4(9209, 19650, 26825) },
+ { AOM_CDF4(27107, 31917, 32432) },
+ { AOM_CDF4(18056, 28893, 31203) },
+ { AOM_CDF4(10200, 21434, 26764) },
+ { AOM_CDF4(4660, 12913, 19502) },
+ { AOM_CDF4(2368, 6930, 12504) },
+ { AOM_CDF4(26960, 32158, 32613) },
+ { AOM_CDF4(18628, 30005, 32031) },
+ { AOM_CDF4(10233, 22442, 28232) },
+ { AOM_CDF4(5471, 14630, 21516) },
+ { AOM_CDF4(3235, 10767, 17109) },
+ { AOM_CDF4(27696, 32440, 32692) },
+ { AOM_CDF4(20032, 31167, 32438) },
+ { AOM_CDF4(8700, 21341, 28442) },
+ { AOM_CDF4(5662, 14831, 21795) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(9704, 17294, 21132) },
+ { AOM_CDF4(26762, 32278, 32633) },
+ { AOM_CDF4(18382, 29620, 31819) },
+ { AOM_CDF4(10891, 23475, 28723) },
+ { AOM_CDF4(6358, 16583, 23309) },
+ { AOM_CDF4(3248, 9118, 14141) },
+ { AOM_CDF4(27204, 32573, 32699) },
+ { AOM_CDF4(19818, 30824, 32329) },
+ { AOM_CDF4(11772, 25120, 30041) },
+ { AOM_CDF4(6995, 18033, 25039) },
+ { AOM_CDF4(3752, 10442, 16098) },
+ { AOM_CDF4(27222, 32256, 32559) },
+ { AOM_CDF4(15356, 28399, 31475) },
+ { AOM_CDF4(8821, 20635, 27057) },
+ { AOM_CDF4(5511, 14404, 21239) },
+ { AOM_CDF4(2935, 8222, 13051) },
+ { AOM_CDF4(24875, 32120, 32529) },
+ { AOM_CDF4(15233, 28265, 31445) },
+ { AOM_CDF4(8605, 20570, 26932) },
+ { AOM_CDF4(5431, 14413, 21196) },
+ { AOM_CDF4(2994, 8341, 13223) },
+ { AOM_CDF4(28201, 32604, 32700) },
+ { AOM_CDF4(21041, 31446, 32456) },
+ { AOM_CDF4(13221, 26213, 30475) },
+ { AOM_CDF4(8255, 19385, 26037) },
+ { AOM_CDF4(4930, 12585, 18830) },
+ { AOM_CDF4(28768, 32448, 32627) },
+ { AOM_CDF4(19705, 30561, 32021) },
+ { AOM_CDF4(11572, 23589, 28220) },
+ { AOM_CDF4(5532, 15034, 21446) },
+ { AOM_CDF4(2460, 7150, 11456) },
+ { AOM_CDF4(29874, 32619, 32699) },
+ { AOM_CDF4(21621, 31071, 32201) },
+ { AOM_CDF4(12511, 24747, 28992) },
+ { AOM_CDF4(6281, 16395, 22748) },
+ { AOM_CDF4(3246, 9278, 14497) },
+ { AOM_CDF4(29715, 32625, 32712) },
+ { AOM_CDF4(20958, 31011, 32283) },
+ { AOM_CDF4(11233, 23671, 28806) },
+ { AOM_CDF4(6012, 16128, 22868) },
+ { AOM_CDF4(3427, 9851, 15414) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(11016, 22111, 26794) },
+ { AOM_CDF4(25946, 32357, 32677) },
+ { AOM_CDF4(17890, 30452, 32252) },
+ { AOM_CDF4(11678, 25142, 29816) },
+ { AOM_CDF4(6720, 17534, 24584) },
+ { AOM_CDF4(4230, 11665, 17820) },
+ { AOM_CDF4(28400, 32623, 32747) },
+ { AOM_CDF4(21164, 31668, 32575) },
+ { AOM_CDF4(13572, 27388, 31182) },
+ { AOM_CDF4(8234, 20750, 27358) },
+ { AOM_CDF4(5065, 14055, 20897) },
+ { AOM_CDF4(28981, 32547, 32705) },
+ { AOM_CDF4(18681, 30543, 32239) },
+ { AOM_CDF4(10919, 24075, 29286) },
+ { AOM_CDF4(6431, 17199, 24077) },
+ { AOM_CDF4(3819, 10464, 16618) },
+ { AOM_CDF4(26870, 32467, 32693) },
+ { AOM_CDF4(19041, 30831, 32347) },
+ { AOM_CDF4(11794, 25211, 30016) },
+ { AOM_CDF4(6888, 18019, 24970) },
+ { AOM_CDF4(4370, 12363, 18992) },
+ { AOM_CDF4(29578, 32670, 32744) },
+ { AOM_CDF4(23159, 32007, 32613) },
+ { AOM_CDF4(15315, 28669, 31676) },
+ { AOM_CDF4(9298, 22607, 28782) },
+ { AOM_CDF4(6144, 15913, 22968) },
+ { AOM_CDF4(28110, 32499, 32669) },
+ { AOM_CDF4(21574, 30937, 32015) },
+ { AOM_CDF4(12759, 24818, 28727) },
+ { AOM_CDF4(6545, 16761, 23042) },
+ { AOM_CDF4(3649, 10597, 16833) },
+ { AOM_CDF4(28163, 32552, 32728) },
+ { AOM_CDF4(22101, 31469, 32464) },
+ { AOM_CDF4(13160, 25472, 30143) },
+ { AOM_CDF4(7303, 18684, 25468) },
+ { AOM_CDF4(5241, 13975, 20955) },
+ { AOM_CDF4(28400, 32631, 32744) },
+ { AOM_CDF4(22104, 31793, 32603) },
+ { AOM_CDF4(13557, 26571, 30846) },
+ { AOM_CDF4(7749, 19861, 26675) },
+ { AOM_CDF4(4873, 14030, 21234) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(9800, 17635, 21073) },
+ { AOM_CDF4(26153, 31885, 32527) },
+ { AOM_CDF4(15038, 27852, 31006) },
+ { AOM_CDF4(8718, 20564, 26486) },
+ { AOM_CDF4(5128, 14076, 20514) },
+ { AOM_CDF4(2636, 7566, 11925) },
+ { AOM_CDF4(27551, 32504, 32701) },
+ { AOM_CDF4(18310, 30054, 32100) },
+ { AOM_CDF4(10211, 23420, 29082) },
+ { AOM_CDF4(6222, 16876, 23916) },
+ { AOM_CDF4(3462, 9954, 15498) },
+ { AOM_CDF4(29991, 32633, 32721) },
+ { AOM_CDF4(19883, 30751, 32201) },
+ { AOM_CDF4(11141, 24184, 29285) },
+ { AOM_CDF4(6420, 16940, 23774) },
+ { AOM_CDF4(3392, 9753, 15118) },
+ { AOM_CDF4(28465, 32616, 32712) },
+ { AOM_CDF4(19850, 30702, 32244) },
+ { AOM_CDF4(10983, 24024, 29223) },
+ { AOM_CDF4(6294, 16770, 23582) },
+ { AOM_CDF4(3244, 9283, 14509) },
+ { AOM_CDF4(30023, 32717, 32748) },
+ { AOM_CDF4(22940, 32032, 32626) },
+ { AOM_CDF4(14282, 27928, 31473) },
+ { AOM_CDF4(8562, 21327, 27914) },
+ { AOM_CDF4(4846, 13393, 19919) },
+ { AOM_CDF4(29981, 32590, 32695) },
+ { AOM_CDF4(20465, 30963, 32166) },
+ { AOM_CDF4(11479, 23579, 28195) },
+ { AOM_CDF4(5916, 15648, 22073) },
+ { AOM_CDF4(3031, 8605, 13398) },
+ { AOM_CDF4(31146, 32691, 32739) },
+ { AOM_CDF4(23106, 31724, 32444) },
+ { AOM_CDF4(13783, 26738, 30439) },
+ { AOM_CDF4(7852, 19468, 25807) },
+ { AOM_CDF4(3860, 11124, 16853) },
+ { AOM_CDF4(31014, 32724, 32748) },
+ { AOM_CDF4(23629, 32109, 32628) },
+ { AOM_CDF4(14747, 28115, 31403) },
+ { AOM_CDF4(8545, 21242, 27478) },
+ { AOM_CDF4(4574, 12781, 19067) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(9185, 19694, 24688) },
+ { AOM_CDF4(26081, 31985, 32621) },
+ { AOM_CDF4(16015, 29000, 31787) },
+ { AOM_CDF4(10542, 23690, 29206) },
+ { AOM_CDF4(6732, 17945, 24677) },
+ { AOM_CDF4(3916, 11039, 16722) },
+ { AOM_CDF4(28224, 32566, 32744) },
+ { AOM_CDF4(19100, 31138, 32485) },
+ { AOM_CDF4(12528, 26620, 30879) },
+ { AOM_CDF4(7741, 20277, 26885) },
+ { AOM_CDF4(4566, 12845, 18990) },
+ { AOM_CDF4(29933, 32593, 32718) },
+ { AOM_CDF4(17670, 30333, 32155) },
+ { AOM_CDF4(10385, 23600, 28909) },
+ { AOM_CDF4(6243, 16236, 22407) },
+ { AOM_CDF4(3976, 10389, 16017) },
+ { AOM_CDF4(28377, 32561, 32738) },
+ { AOM_CDF4(19366, 31175, 32482) },
+ { AOM_CDF4(13327, 27175, 31094) },
+ { AOM_CDF4(8258, 20769, 27143) },
+ { AOM_CDF4(4703, 13198, 19527) },
+ { AOM_CDF4(31086, 32706, 32748) },
+ { AOM_CDF4(22853, 31902, 32583) },
+ { AOM_CDF4(14759, 28186, 31419) },
+ { AOM_CDF4(9284, 22382, 28348) },
+ { AOM_CDF4(5585, 15192, 21868) },
+ { AOM_CDF4(28291, 32652, 32746) },
+ { AOM_CDF4(19849, 32107, 32571) },
+ { AOM_CDF4(14834, 26818, 29214) },
+ { AOM_CDF4(10306, 22594, 28672) },
+ { AOM_CDF4(6615, 17384, 23384) },
+ { AOM_CDF4(28947, 32604, 32745) },
+ { AOM_CDF4(25625, 32289, 32646) },
+ { AOM_CDF4(18758, 28672, 31403) },
+ { AOM_CDF4(10017, 23430, 28523) },
+ { AOM_CDF4(6862, 15269, 22131) },
+ { AOM_CDF4(23933, 32509, 32739) },
+ { AOM_CDF4(19927, 31495, 32631) },
+ { AOM_CDF4(11903, 26023, 30621) },
+ { AOM_CDF4(7026, 20094, 27252) },
+ { AOM_CDF4(5998, 18106, 24437) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(4456, 11274, 15533) },
+ { AOM_CDF4(21219, 29079, 31616) },
+ { AOM_CDF4(11173, 23774, 28567) },
+ { AOM_CDF4(7282, 18293, 24263) },
+ { AOM_CDF4(4890, 13286, 19115) },
+ { AOM_CDF4(1890, 5508, 8659) },
+ { AOM_CDF4(26651, 32136, 32647) },
+ { AOM_CDF4(14630, 28254, 31455) },
+ { AOM_CDF4(8716, 21287, 27395) },
+ { AOM_CDF4(5615, 15331, 22008) },
+ { AOM_CDF4(2675, 7700, 12150) },
+ { AOM_CDF4(29954, 32526, 32690) },
+ { AOM_CDF4(16126, 28982, 31633) },
+ { AOM_CDF4(9030, 21361, 27352) },
+ { AOM_CDF4(5411, 14793, 21271) },
+ { AOM_CDF4(2943, 8422, 13163) },
+ { AOM_CDF4(29539, 32601, 32730) },
+ { AOM_CDF4(18125, 30385, 32201) },
+ { AOM_CDF4(10422, 24090, 29468) },
+ { AOM_CDF4(6468, 17487, 24438) },
+ { AOM_CDF4(2970, 8653, 13531) },
+ { AOM_CDF4(30912, 32715, 32748) },
+ { AOM_CDF4(20666, 31373, 32497) },
+ { AOM_CDF4(12509, 26640, 30917) },
+ { AOM_CDF4(8058, 20629, 27290) },
+ { AOM_CDF4(4231, 12006, 18052) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(10202, 20633, 25484) },
+ { AOM_CDF4(27336, 31445, 32352) },
+ { AOM_CDF4(12420, 24384, 28552) },
+ { AOM_CDF4(7648, 18115, 23856) },
+ { AOM_CDF4(5662, 14341, 19902) },
+ { AOM_CDF4(3611, 10328, 15390) },
+ { AOM_CDF4(30945, 32616, 32736) },
+ { AOM_CDF4(18682, 30505, 32253) },
+ { AOM_CDF4(11513, 25336, 30203) },
+ { AOM_CDF4(7449, 19452, 26148) },
+ { AOM_CDF4(4482, 13051, 18886) },
+ { AOM_CDF4(32022, 32690, 32747) },
+ { AOM_CDF4(18578, 30501, 32146) },
+ { AOM_CDF4(11249, 23368, 28631) },
+ { AOM_CDF4(5645, 16958, 22158) },
+ { AOM_CDF4(5009, 11444, 16637) },
+ { AOM_CDF4(31357, 32710, 32748) },
+ { AOM_CDF4(21552, 31494, 32504) },
+ { AOM_CDF4(13891, 27677, 31340) },
+ { AOM_CDF4(9051, 22098, 28172) },
+ { AOM_CDF4(5190, 13377, 19486) },
+ { AOM_CDF4(32364, 32740, 32748) },
+ { AOM_CDF4(24839, 31907, 32551) },
+ { AOM_CDF4(17160, 28779, 31696) },
+ { AOM_CDF4(12452, 24137, 29602) },
+ { AOM_CDF4(6165, 15389, 22477) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(2575, 7281, 11077) },
+ { AOM_CDF4(14002, 20866, 25402) },
+ { AOM_CDF4(6343, 15056, 19658) },
+ { AOM_CDF4(4474, 11858, 17041) },
+ { AOM_CDF4(2865, 8299, 12534) },
+ { AOM_CDF4(1344, 3949, 6391) },
+ { AOM_CDF4(24720, 31239, 32459) },
+ { AOM_CDF4(12585, 25356, 29968) },
+ { AOM_CDF4(7181, 18246, 24444) },
+ { AOM_CDF4(5025, 13667, 19885) },
+ { AOM_CDF4(2521, 7304, 11605) },
+ { AOM_CDF4(29908, 32252, 32584) },
+ { AOM_CDF4(17421, 29156, 31575) },
+ { AOM_CDF4(9889, 22188, 27782) },
+ { AOM_CDF4(5878, 15647, 22123) },
+ { AOM_CDF4(2814, 8665, 13323) },
+ { AOM_CDF4(30183, 32568, 32713) },
+ { AOM_CDF4(18528, 30195, 32049) },
+ { AOM_CDF4(10982, 24606, 29657) },
+ { AOM_CDF4(6957, 18165, 25231) },
+ { AOM_CDF4(3508, 10118, 15468) },
+ { AOM_CDF4(31761, 32736, 32748) },
+ { AOM_CDF4(21041, 31328, 32546) },
+ { AOM_CDF4(12568, 26732, 31166) },
+ { AOM_CDF4(8052, 20720, 27733) },
+ { AOM_CDF4(4336, 12192, 18396) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } },
+ { { { { AOM_CDF4(7062, 16472, 22319) },
+ { AOM_CDF4(24538, 32261, 32674) },
+ { AOM_CDF4(13675, 28041, 31779) },
+ { AOM_CDF4(8590, 20674, 27631) },
+ { AOM_CDF4(5685, 14675, 22013) },
+ { AOM_CDF4(3655, 9898, 15731) },
+ { AOM_CDF4(26493, 32418, 32658) },
+ { AOM_CDF4(16376, 29342, 32090) },
+ { AOM_CDF4(10594, 22649, 28970) },
+ { AOM_CDF4(8176, 17170, 24303) },
+ { AOM_CDF4(5605, 12694, 19139) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(23888, 31902, 32542) },
+ { AOM_CDF4(18612, 29687, 31987) },
+ { AOM_CDF4(16245, 24852, 29249) },
+ { AOM_CDF4(15765, 22608, 27559) },
+ { AOM_CDF4(19895, 24699, 27510) },
+ { AOM_CDF4(28401, 32212, 32457) },
+ { AOM_CDF4(15274, 27825, 30980) },
+ { AOM_CDF4(9364, 18128, 24332) },
+ { AOM_CDF4(2283, 8193, 15082) },
+ { AOM_CDF4(1228, 3972, 7881) },
+ { AOM_CDF4(29455, 32469, 32620) },
+ { AOM_CDF4(17981, 28245, 31388) },
+ { AOM_CDF4(10921, 20098, 26240) },
+ { AOM_CDF4(3743, 11829, 18657) },
+ { AOM_CDF4(2374, 9593, 15715) },
+ { AOM_CDF4(31068, 32466, 32635) },
+ { AOM_CDF4(20321, 29572, 31971) },
+ { AOM_CDF4(10771, 20255, 27119) },
+ { AOM_CDF4(2795, 10410, 17361) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(9320, 22102, 27840) },
+ { AOM_CDF4(27057, 32464, 32724) },
+ { AOM_CDF4(16331, 30268, 32309) },
+ { AOM_CDF4(10319, 23935, 29720) },
+ { AOM_CDF4(6189, 16448, 24106) },
+ { AOM_CDF4(3589, 10884, 18808) },
+ { AOM_CDF4(29026, 32624, 32748) },
+ { AOM_CDF4(19226, 31507, 32587) },
+ { AOM_CDF4(12692, 26921, 31203) },
+ { AOM_CDF4(7049, 19532, 27635) },
+ { AOM_CDF4(7727, 15669, 23252) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(28056, 32625, 32748) },
+ { AOM_CDF4(22383, 32075, 32669) },
+ { AOM_CDF4(15417, 27098, 31749) },
+ { AOM_CDF4(18127, 26493, 27190) },
+ { AOM_CDF4(5461, 16384, 21845) },
+ { AOM_CDF4(27982, 32091, 32584) },
+ { AOM_CDF4(19045, 29868, 31972) },
+ { AOM_CDF4(10397, 22266, 27932) },
+ { AOM_CDF4(5990, 13697, 21500) },
+ { AOM_CDF4(1792, 6912, 15104) },
+ { AOM_CDF4(28198, 32501, 32718) },
+ { AOM_CDF4(21534, 31521, 32569) },
+ { AOM_CDF4(11109, 25217, 30017) },
+ { AOM_CDF4(5671, 15124, 26151) },
+ { AOM_CDF4(4681, 14043, 18725) },
+ { AOM_CDF4(28688, 32580, 32741) },
+ { AOM_CDF4(22576, 32079, 32661) },
+ { AOM_CDF4(10627, 22141, 28340) },
+ { AOM_CDF4(9362, 14043, 28087) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(7754, 16948, 22142) },
+ { AOM_CDF4(25670, 32330, 32691) },
+ { AOM_CDF4(15663, 29225, 31994) },
+ { AOM_CDF4(9878, 23288, 29158) },
+ { AOM_CDF4(6419, 17088, 24336) },
+ { AOM_CDF4(3859, 11003, 17039) },
+ { AOM_CDF4(27562, 32595, 32725) },
+ { AOM_CDF4(17575, 30588, 32399) },
+ { AOM_CDF4(10819, 24838, 30309) },
+ { AOM_CDF4(7124, 18686, 25916) },
+ { AOM_CDF4(4479, 12688, 19340) },
+ { AOM_CDF4(28385, 32476, 32673) },
+ { AOM_CDF4(15306, 29005, 31938) },
+ { AOM_CDF4(8937, 21615, 28322) },
+ { AOM_CDF4(5982, 15603, 22786) },
+ { AOM_CDF4(3620, 10267, 16136) },
+ { AOM_CDF4(27280, 32464, 32667) },
+ { AOM_CDF4(15607, 29160, 32004) },
+ { AOM_CDF4(9091, 22135, 28740) },
+ { AOM_CDF4(6232, 16632, 24020) },
+ { AOM_CDF4(4047, 11377, 17672) },
+ { AOM_CDF4(29220, 32630, 32718) },
+ { AOM_CDF4(19650, 31220, 32462) },
+ { AOM_CDF4(13050, 26312, 30827) },
+ { AOM_CDF4(9228, 20870, 27468) },
+ { AOM_CDF4(6146, 15149, 21971) },
+ { AOM_CDF4(30169, 32481, 32623) },
+ { AOM_CDF4(17212, 29311, 31554) },
+ { AOM_CDF4(9911, 21311, 26882) },
+ { AOM_CDF4(4487, 13314, 20372) },
+ { AOM_CDF4(2570, 7772, 12889) },
+ { AOM_CDF4(30924, 32613, 32708) },
+ { AOM_CDF4(19490, 30206, 32107) },
+ { AOM_CDF4(11232, 23998, 29276) },
+ { AOM_CDF4(6769, 17955, 25035) },
+ { AOM_CDF4(4398, 12623, 19214) },
+ { AOM_CDF4(30609, 32627, 32722) },
+ { AOM_CDF4(19370, 30582, 32287) },
+ { AOM_CDF4(10457, 23619, 29409) },
+ { AOM_CDF4(6443, 17637, 24834) },
+ { AOM_CDF4(4645, 13236, 20106) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8626, 20271, 26216) },
+ { AOM_CDF4(26707, 32406, 32711) },
+ { AOM_CDF4(16999, 30329, 32286) },
+ { AOM_CDF4(11445, 25123, 30286) },
+ { AOM_CDF4(6411, 18828, 25601) },
+ { AOM_CDF4(6801, 12458, 20248) },
+ { AOM_CDF4(29918, 32682, 32748) },
+ { AOM_CDF4(20649, 31739, 32618) },
+ { AOM_CDF4(12879, 27773, 31581) },
+ { AOM_CDF4(7896, 21751, 28244) },
+ { AOM_CDF4(5260, 14870, 23698) },
+ { AOM_CDF4(29252, 32593, 32731) },
+ { AOM_CDF4(17072, 30460, 32294) },
+ { AOM_CDF4(10653, 24143, 29365) },
+ { AOM_CDF4(6536, 17490, 23983) },
+ { AOM_CDF4(4929, 13170, 20085) },
+ { AOM_CDF4(28137, 32518, 32715) },
+ { AOM_CDF4(18171, 30784, 32407) },
+ { AOM_CDF4(11437, 25436, 30459) },
+ { AOM_CDF4(7252, 18534, 26176) },
+ { AOM_CDF4(4126, 13353, 20978) },
+ { AOM_CDF4(31162, 32726, 32748) },
+ { AOM_CDF4(23017, 32222, 32701) },
+ { AOM_CDF4(15629, 29233, 32046) },
+ { AOM_CDF4(9387, 22621, 29480) },
+ { AOM_CDF4(6922, 17616, 25010) },
+ { AOM_CDF4(28838, 32265, 32614) },
+ { AOM_CDF4(19701, 30206, 31920) },
+ { AOM_CDF4(11214, 22410, 27933) },
+ { AOM_CDF4(5320, 14177, 23034) },
+ { AOM_CDF4(5049, 12881, 17827) },
+ { AOM_CDF4(27484, 32471, 32734) },
+ { AOM_CDF4(21076, 31526, 32561) },
+ { AOM_CDF4(12707, 26303, 31211) },
+ { AOM_CDF4(8169, 21722, 28219) },
+ { AOM_CDF4(6045, 19406, 27042) },
+ { AOM_CDF4(27753, 32572, 32745) },
+ { AOM_CDF4(20832, 31878, 32653) },
+ { AOM_CDF4(13250, 27356, 31674) },
+ { AOM_CDF4(7718, 21508, 29858) },
+ { AOM_CDF4(7209, 18350, 25559) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(7876, 16901, 21741) },
+ { AOM_CDF4(24001, 31898, 32625) },
+ { AOM_CDF4(14529, 27959, 31451) },
+ { AOM_CDF4(8273, 20818, 27258) },
+ { AOM_CDF4(5278, 14673, 21510) },
+ { AOM_CDF4(2983, 8843, 14039) },
+ { AOM_CDF4(28016, 32574, 32732) },
+ { AOM_CDF4(17471, 30306, 32301) },
+ { AOM_CDF4(10224, 24063, 29728) },
+ { AOM_CDF4(6602, 17954, 25052) },
+ { AOM_CDF4(4002, 11585, 17759) },
+ { AOM_CDF4(30190, 32634, 32739) },
+ { AOM_CDF4(17497, 30282, 32270) },
+ { AOM_CDF4(10229, 23729, 29538) },
+ { AOM_CDF4(6344, 17211, 24440) },
+ { AOM_CDF4(3849, 11189, 17108) },
+ { AOM_CDF4(28570, 32583, 32726) },
+ { AOM_CDF4(17521, 30161, 32238) },
+ { AOM_CDF4(10153, 23565, 29378) },
+ { AOM_CDF4(6455, 17341, 24443) },
+ { AOM_CDF4(3907, 11042, 17024) },
+ { AOM_CDF4(30689, 32715, 32748) },
+ { AOM_CDF4(21546, 31840, 32610) },
+ { AOM_CDF4(13547, 27581, 31459) },
+ { AOM_CDF4(8912, 21757, 28309) },
+ { AOM_CDF4(5548, 15080, 22046) },
+ { AOM_CDF4(30783, 32540, 32685) },
+ { AOM_CDF4(17540, 29528, 31668) },
+ { AOM_CDF4(10160, 21468, 26783) },
+ { AOM_CDF4(4724, 13393, 20054) },
+ { AOM_CDF4(2702, 8174, 13102) },
+ { AOM_CDF4(31648, 32686, 32742) },
+ { AOM_CDF4(20954, 31094, 32337) },
+ { AOM_CDF4(12420, 25698, 30179) },
+ { AOM_CDF4(7304, 19320, 26248) },
+ { AOM_CDF4(4366, 12261, 18864) },
+ { AOM_CDF4(31581, 32723, 32748) },
+ { AOM_CDF4(21373, 31586, 32525) },
+ { AOM_CDF4(12744, 26625, 30885) },
+ { AOM_CDF4(7431, 20322, 26950) },
+ { AOM_CDF4(4692, 13323, 20111) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(7833, 18369, 24095) },
+ { AOM_CDF4(26650, 32273, 32702) },
+ { AOM_CDF4(16371, 29961, 32191) },
+ { AOM_CDF4(11055, 24082, 29629) },
+ { AOM_CDF4(6892, 18644, 25400) },
+ { AOM_CDF4(5006, 13057, 19240) },
+ { AOM_CDF4(29834, 32666, 32748) },
+ { AOM_CDF4(19577, 31335, 32570) },
+ { AOM_CDF4(12253, 26509, 31122) },
+ { AOM_CDF4(7991, 20772, 27711) },
+ { AOM_CDF4(5677, 15910, 23059) },
+ { AOM_CDF4(30109, 32532, 32720) },
+ { AOM_CDF4(16747, 30166, 32252) },
+ { AOM_CDF4(10134, 23542, 29184) },
+ { AOM_CDF4(5791, 16176, 23556) },
+ { AOM_CDF4(4362, 10414, 17284) },
+ { AOM_CDF4(29492, 32626, 32748) },
+ { AOM_CDF4(19894, 31402, 32525) },
+ { AOM_CDF4(12942, 27071, 30869) },
+ { AOM_CDF4(8346, 21216, 27405) },
+ { AOM_CDF4(6572, 17087, 23859) },
+ { AOM_CDF4(32035, 32735, 32748) },
+ { AOM_CDF4(22957, 31838, 32618) },
+ { AOM_CDF4(14724, 28572, 31772) },
+ { AOM_CDF4(10364, 23999, 29553) },
+ { AOM_CDF4(7004, 18433, 25655) },
+ { AOM_CDF4(27528, 32277, 32681) },
+ { AOM_CDF4(16959, 31171, 32096) },
+ { AOM_CDF4(10486, 23593, 27962) },
+ { AOM_CDF4(8192, 16384, 23211) },
+ { AOM_CDF4(8937, 17873, 20852) },
+ { AOM_CDF4(27715, 32002, 32615) },
+ { AOM_CDF4(15073, 29491, 31676) },
+ { AOM_CDF4(11264, 24576, 28672) },
+ { AOM_CDF4(2341, 18725, 23406) },
+ { AOM_CDF4(7282, 18204, 25486) },
+ { AOM_CDF4(28547, 32213, 32657) },
+ { AOM_CDF4(20788, 29773, 32239) },
+ { AOM_CDF4(6780, 21469, 30508) },
+ { AOM_CDF4(5958, 14895, 23831) },
+ { AOM_CDF4(16384, 21845, 27307) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(5992, 14304, 19765) },
+ { AOM_CDF4(22612, 31238, 32456) },
+ { AOM_CDF4(13456, 27162, 31087) },
+ { AOM_CDF4(8001, 20062, 26504) },
+ { AOM_CDF4(5168, 14105, 20764) },
+ { AOM_CDF4(2632, 7771, 12385) },
+ { AOM_CDF4(27034, 32344, 32709) },
+ { AOM_CDF4(15850, 29415, 31997) },
+ { AOM_CDF4(9494, 22776, 28841) },
+ { AOM_CDF4(6151, 16830, 23969) },
+ { AOM_CDF4(3461, 10039, 15722) },
+ { AOM_CDF4(30134, 32569, 32731) },
+ { AOM_CDF4(15638, 29422, 31945) },
+ { AOM_CDF4(9150, 21865, 28218) },
+ { AOM_CDF4(5647, 15719, 22676) },
+ { AOM_CDF4(3402, 9772, 15477) },
+ { AOM_CDF4(28530, 32586, 32735) },
+ { AOM_CDF4(17139, 30298, 32292) },
+ { AOM_CDF4(10200, 24039, 29685) },
+ { AOM_CDF4(6419, 17674, 24786) },
+ { AOM_CDF4(3544, 10225, 15824) },
+ { AOM_CDF4(31333, 32726, 32748) },
+ { AOM_CDF4(20618, 31487, 32544) },
+ { AOM_CDF4(12901, 27217, 31232) },
+ { AOM_CDF4(8624, 21734, 28171) },
+ { AOM_CDF4(5104, 14191, 20748) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(11206, 21090, 26561) },
+ { AOM_CDF4(28759, 32279, 32671) },
+ { AOM_CDF4(14171, 27952, 31569) },
+ { AOM_CDF4(9743, 22907, 29141) },
+ { AOM_CDF4(6871, 17886, 24868) },
+ { AOM_CDF4(4960, 13152, 19315) },
+ { AOM_CDF4(31077, 32661, 32748) },
+ { AOM_CDF4(19400, 31195, 32515) },
+ { AOM_CDF4(12752, 26858, 31040) },
+ { AOM_CDF4(8370, 22098, 28591) },
+ { AOM_CDF4(5457, 15373, 22298) },
+ { AOM_CDF4(31697, 32706, 32748) },
+ { AOM_CDF4(17860, 30657, 32333) },
+ { AOM_CDF4(12510, 24812, 29261) },
+ { AOM_CDF4(6180, 19124, 24722) },
+ { AOM_CDF4(5041, 13548, 17959) },
+ { AOM_CDF4(31552, 32716, 32748) },
+ { AOM_CDF4(21908, 31769, 32623) },
+ { AOM_CDF4(14470, 28201, 31565) },
+ { AOM_CDF4(9493, 22982, 28608) },
+ { AOM_CDF4(6858, 17240, 24137) },
+ { AOM_CDF4(32543, 32752, 32756) },
+ { AOM_CDF4(24286, 32097, 32666) },
+ { AOM_CDF4(15958, 29217, 32024) },
+ { AOM_CDF4(10207, 24234, 29958) },
+ { AOM_CDF4(6929, 18305, 25652) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } },
+ { { { AOM_CDF4(4137, 10847, 15682) },
+ { AOM_CDF4(17824, 27001, 30058) },
+ { AOM_CDF4(10204, 22796, 28291) },
+ { AOM_CDF4(6076, 15935, 22125) },
+ { AOM_CDF4(3852, 10937, 16816) },
+ { AOM_CDF4(2252, 6324, 10131) },
+ { AOM_CDF4(25840, 32016, 32662) },
+ { AOM_CDF4(15109, 28268, 31531) },
+ { AOM_CDF4(9385, 22231, 28340) },
+ { AOM_CDF4(6082, 16672, 23479) },
+ { AOM_CDF4(3318, 9427, 14681) },
+ { AOM_CDF4(30594, 32574, 32718) },
+ { AOM_CDF4(16836, 29552, 31859) },
+ { AOM_CDF4(9556, 22542, 28356) },
+ { AOM_CDF4(6305, 16725, 23540) },
+ { AOM_CDF4(3376, 9895, 15184) },
+ { AOM_CDF4(29383, 32617, 32745) },
+ { AOM_CDF4(18891, 30809, 32401) },
+ { AOM_CDF4(11688, 25942, 30687) },
+ { AOM_CDF4(7468, 19469, 26651) },
+ { AOM_CDF4(3909, 11358, 17012) },
+ { AOM_CDF4(31564, 32736, 32748) },
+ { AOM_CDF4(20906, 31611, 32600) },
+ { AOM_CDF4(13191, 27621, 31537) },
+ { AOM_CDF4(8768, 22029, 28676) },
+ { AOM_CDF4(5079, 14109, 20906) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } },
+ { { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) },
+ { AOM_CDF4(8192, 16384, 24576) } } } } };
-typedef coeff_cdf_model coeff_cdf_table[TX_SIZES][PLANE_TYPES];
-static const coeff_cdf_table *av1_default_qctx_coef_cdfs[TOKEN_CDF_Q_CTXS] = {
- &av1_default_coef_head_cdfs_q0, &av1_default_coef_head_cdfs_q1,
- &av1_default_coef_head_cdfs_q2, &av1_default_coef_head_cdfs_q3,
-};
+static const aom_cdf_prob av1_default_coeff_base_eob_multi_cdfs
+ [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB][CDF_SIZE(
+ NUM_BASE_LEVELS + 1)] = { { { { { AOM_CDF3(17837, 29055) },
+ { AOM_CDF3(29600, 31446) },
+ { AOM_CDF3(30844, 31878) },
+ { AOM_CDF3(24926, 28948) } },
+ { { AOM_CDF3(21365, 30026) },
+ { AOM_CDF3(30512, 32423) },
+ { AOM_CDF3(31658, 32621) },
+ { AOM_CDF3(29630, 31881) } } },
+ { { { AOM_CDF3(5717, 26477) },
+ { AOM_CDF3(30491, 31703) },
+ { AOM_CDF3(31550, 32158) },
+ { AOM_CDF3(29648, 31491) } },
+ { { AOM_CDF3(12608, 27820) },
+ { AOM_CDF3(30680, 32225) },
+ { AOM_CDF3(30809, 32335) },
+ { AOM_CDF3(31299, 32423) } } },
+ { { { AOM_CDF3(1786, 12612) },
+ { AOM_CDF3(30663, 31625) },
+ { AOM_CDF3(32339, 32468) },
+ { AOM_CDF3(31148, 31833) } },
+ { { AOM_CDF3(18857, 23865) },
+ { AOM_CDF3(31428, 32428) },
+ { AOM_CDF3(31744, 32373) },
+ { AOM_CDF3(31775, 32526) } } },
+ { { { AOM_CDF3(1787, 2532) },
+ { AOM_CDF3(30832, 31662) },
+ { AOM_CDF3(31824, 32682) },
+ { AOM_CDF3(32133, 32569) } },
+ { { AOM_CDF3(13751, 22235) },
+ { AOM_CDF3(32089, 32409) },
+ { AOM_CDF3(27084, 27920) },
+ { AOM_CDF3(29291, 32594) } } },
+ { { { AOM_CDF3(1725, 3449) },
+ { AOM_CDF3(31102, 31935) },
+ { AOM_CDF3(32457, 32613) },
+ { AOM_CDF3(32412, 32649) } },
+ { { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) } } } },
+ { { { { AOM_CDF3(17560, 29888) },
+ { AOM_CDF3(29671, 31549) },
+ { AOM_CDF3(31007, 32056) },
+ { AOM_CDF3(27286, 30006) } },
+ { { AOM_CDF3(26594, 31212) },
+ { AOM_CDF3(31208, 32582) },
+ { AOM_CDF3(31835, 32637) },
+ { AOM_CDF3(30595, 32206) } } },
+ { { { AOM_CDF3(15239, 29932) },
+ { AOM_CDF3(31315, 32095) },
+ { AOM_CDF3(32130, 32434) },
+ { AOM_CDF3(30864, 31996) } },
+ { { AOM_CDF3(26279, 30968) },
+ { AOM_CDF3(31142, 32495) },
+ { AOM_CDF3(31713, 32540) },
+ { AOM_CDF3(31929, 32594) } } },
+ { { { AOM_CDF3(2644, 25198) },
+ { AOM_CDF3(32038, 32451) },
+ { AOM_CDF3(32639, 32695) },
+ { AOM_CDF3(32166, 32518) } },
+ { { AOM_CDF3(17187, 27668) },
+ { AOM_CDF3(31714, 32550) },
+ { AOM_CDF3(32283, 32678) },
+ { AOM_CDF3(31930, 32563) } } },
+ { { { AOM_CDF3(1044, 2257) },
+ { AOM_CDF3(30755, 31923) },
+ { AOM_CDF3(32208, 32693) },
+ { AOM_CDF3(32244, 32615) } },
+ { { AOM_CDF3(21317, 26207) },
+ { AOM_CDF3(29133, 30868) },
+ { AOM_CDF3(29311, 31231) },
+ { AOM_CDF3(29657, 31087) } } },
+ { { { AOM_CDF3(478, 1834) },
+ { AOM_CDF3(31005, 31987) },
+ { AOM_CDF3(32317, 32724) },
+ { AOM_CDF3(30865, 32648) } },
+ { { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) } } } },
+ { { { { AOM_CDF3(20092, 30774) },
+ { AOM_CDF3(30695, 32020) },
+ { AOM_CDF3(31131, 32103) },
+ { AOM_CDF3(28666, 30870) } },
+ { { AOM_CDF3(27258, 31095) },
+ { AOM_CDF3(31804, 32623) },
+ { AOM_CDF3(31763, 32528) },
+ { AOM_CDF3(31438, 32506) } } },
+ { { { AOM_CDF3(18049, 30489) },
+ { AOM_CDF3(31706, 32286) },
+ { AOM_CDF3(32163, 32473) },
+ { AOM_CDF3(31550, 32184) } },
+ { { AOM_CDF3(27116, 30842) },
+ { AOM_CDF3(31971, 32598) },
+ { AOM_CDF3(32088, 32576) },
+ { AOM_CDF3(32067, 32664) } } },
+ { { { AOM_CDF3(12854, 29093) },
+ { AOM_CDF3(32272, 32558) },
+ { AOM_CDF3(32667, 32729) },
+ { AOM_CDF3(32306, 32585) } },
+ { { AOM_CDF3(25476, 30366) },
+ { AOM_CDF3(32169, 32687) },
+ { AOM_CDF3(32479, 32689) },
+ { AOM_CDF3(31673, 32634) } } },
+ { { { AOM_CDF3(2809, 19301) },
+ { AOM_CDF3(32205, 32622) },
+ { AOM_CDF3(32338, 32730) },
+ { AOM_CDF3(31786, 32616) } },
+ { { AOM_CDF3(22737, 29105) },
+ { AOM_CDF3(30810, 32362) },
+ { AOM_CDF3(30014, 32627) },
+ { AOM_CDF3(30528, 32574) } } },
+ { { { AOM_CDF3(935, 3382) },
+ { AOM_CDF3(30789, 31909) },
+ { AOM_CDF3(32466, 32756) },
+ { AOM_CDF3(30860, 32513) } },
+ { { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) } } } },
+ { { { { AOM_CDF3(22497, 31198) },
+ { AOM_CDF3(31715, 32495) },
+ { AOM_CDF3(31606, 32337) },
+ { AOM_CDF3(30388, 31990) } },
+ { { AOM_CDF3(27877, 31584) },
+ { AOM_CDF3(32170, 32728) },
+ { AOM_CDF3(32155, 32688) },
+ { AOM_CDF3(32219, 32702) } } },
+ { { { AOM_CDF3(21457, 31043) },
+ { AOM_CDF3(31951, 32483) },
+ { AOM_CDF3(32153, 32562) },
+ { AOM_CDF3(31473, 32215) } },
+ { { AOM_CDF3(27558, 31151) },
+ { AOM_CDF3(32020, 32640) },
+ { AOM_CDF3(32097, 32575) },
+ { AOM_CDF3(32242, 32719) } } },
+ { { { AOM_CDF3(19980, 30591) },
+ { AOM_CDF3(32219, 32597) },
+ { AOM_CDF3(32581, 32706) },
+ { AOM_CDF3(31803, 32287) } },
+ { { AOM_CDF3(26473, 30507) },
+ { AOM_CDF3(32431, 32723) },
+ { AOM_CDF3(32196, 32611) },
+ { AOM_CDF3(31588, 32528) } } },
+ { { { AOM_CDF3(24647, 30463) },
+ { AOM_CDF3(32412, 32695) },
+ { AOM_CDF3(32468, 32720) },
+ { AOM_CDF3(31269, 32523) } },
+ { { AOM_CDF3(28482, 31505) },
+ { AOM_CDF3(32152, 32701) },
+ { AOM_CDF3(31732, 32598) },
+ { AOM_CDF3(31767, 32712) } } },
+ { { { AOM_CDF3(12358, 24977) },
+ { AOM_CDF3(31331, 32385) },
+ { AOM_CDF3(32634, 32756) },
+ { AOM_CDF3(30411, 32548) } },
+ { { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) },
+ { AOM_CDF3(10923, 21845) } } } } };
diff --git a/third_party/aom/av1/common/txb_common.c b/third_party/aom/av1/common/txb_common.c
index c5b91e991..c96d37cca 100644
--- a/third_party/aom/av1/common/txb_common.c
+++ b/third_party/aom/av1/common/txb_common.c
@@ -12,17 +12,17 @@
#include "av1/common/onyxc_int.h"
#include "av1/common/txb_common.h"
-const int16_t av1_coeff_band_4x4[16] = { 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15 };
+const int8_t av1_coeff_band_4x4[16] = { 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15 };
-const int16_t av1_coeff_band_8x8[64] = {
+const int8_t av1_coeff_band_8x8[64] = {
0, 1, 2, 2, 3, 3, 4, 4, 5, 6, 2, 2, 3, 3, 4, 4,
7, 7, 8, 8, 9, 9, 10, 10, 7, 7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14, 14, 11, 11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18, 18, 15, 15, 16, 16, 17, 17, 18, 18,
};
-const int16_t av1_coeff_band_16x16[256] = {
+const int8_t av1_coeff_band_16x16[256] = {
0, 1, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 2, 3, 4,
4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7,
7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7, 7, 7, 8,
@@ -39,7 +39,7 @@ const int16_t av1_coeff_band_16x16[256] = {
19, 20, 20, 20, 20, 21, 21, 21, 21,
};
-const int16_t av1_coeff_band_32x32[1024] = {
+const int8_t av1_coeff_band_32x32[1024] = {
0, 1, 4, 4, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 2, 3, 4, 4, 7, 7,
7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
@@ -96,223 +96,372 @@ const int16_t av1_coeff_band_32x32[1024] = {
22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
};
-#if LV_MAP_PROB
-void av1_init_txb_probs(FRAME_CONTEXT *fc) {
- TX_SIZE tx_size;
- int plane, ctx, level;
+// The ctx offset table when TX is TX_CLASS_2D.
+// TX col and row indices are clamped to 4
- // Update probability models for transform block skip flag
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx) {
- fc->txb_skip_cdf[tx_size][ctx][0] =
- AOM_ICDF(128 * (aom_cdf_prob)fc->txb_skip[tx_size][ctx]);
- fc->txb_skip_cdf[tx_size][ctx][1] = AOM_ICDF(32768);
- fc->txb_skip_cdf[tx_size][ctx][2] = 0;
- }
- }
+const int8_t av1_nz_map_ctx_offset_4x4[16] = {
+ 0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21,
+};
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx) {
- fc->dc_sign_cdf[plane][ctx][0] =
- AOM_ICDF(128 * (aom_cdf_prob)fc->dc_sign[plane][ctx]);
- fc->dc_sign_cdf[plane][ctx][1] = AOM_ICDF(32768);
- fc->dc_sign_cdf[plane][ctx][2] = 0;
- }
- }
+const int8_t av1_nz_map_ctx_offset_8x8[64] = {
+ 0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21,
+ 6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- // Update probability models for non-zero coefficient map and eob flag.
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (level = 0; level < NUM_BASE_LEVELS; ++level) {
- for (ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx) {
- fc->coeff_base_cdf[tx_size][plane][level][ctx][0] = AOM_ICDF(
- 128 * (aom_cdf_prob)fc->coeff_base[tx_size][plane][level][ctx]);
- fc->coeff_base_cdf[tx_size][plane][level][ctx][1] = AOM_ICDF(32768);
- fc->coeff_base_cdf[tx_size][plane][level][ctx][2] = 0;
- }
- }
- }
- }
+const int8_t av1_nz_map_ctx_offset_16x16[256] = {
+ 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
- fc->nz_map_cdf[tx_size][plane][ctx][0] =
- AOM_ICDF(128 * (aom_cdf_prob)fc->nz_map[tx_size][plane][ctx]);
- fc->nz_map_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
- fc->nz_map_cdf[tx_size][plane][ctx][2] = 0;
- }
+const int8_t av1_nz_map_ctx_offset_32x32[1024] = {
+ 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) {
- fc->eob_flag_cdf[tx_size][plane][ctx][0] =
- AOM_ICDF(128 * (aom_cdf_prob)fc->eob_flag[tx_size][plane][ctx]);
- fc->eob_flag_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
- fc->eob_flag_cdf[tx_size][plane][ctx][2] = 0;
- }
- }
- }
+const int8_t av1_nz_map_ctx_offset_8x4[32] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, 21, 21,
+ 16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
+};
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
- fc->coeff_lps_cdf[tx_size][plane][ctx][0] =
- AOM_ICDF(128 * (aom_cdf_prob)fc->coeff_lps[tx_size][plane][ctx]);
- fc->coeff_lps_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
- fc->coeff_lps_cdf[tx_size][plane][ctx][2] = 0;
- }
-#if BR_NODE
- for (int br = 0; br < BASE_RANGE_SETS; ++br) {
- for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
- fc->coeff_br_cdf[tx_size][plane][br][ctx][0] = AOM_ICDF(
- 128 * (aom_cdf_prob)fc->coeff_br[tx_size][plane][br][ctx]);
- fc->coeff_br_cdf[tx_size][plane][br][ctx][1] = AOM_ICDF(32768);
- fc->coeff_br_cdf[tx_size][plane][br][ctx][2] = 0;
- }
- }
-#endif // BR_NODE
- }
- }
-#if CONFIG_CTX1D
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
- fc->eob_mode_cdf[tx_size][plane][tx_class][0] = AOM_ICDF(
- 128 * (aom_cdf_prob)fc->eob_mode[tx_size][plane][tx_class]);
- fc->eob_mode_cdf[tx_size][plane][tx_class][1] = AOM_ICDF(32768);
- fc->eob_mode_cdf[tx_size][plane][tx_class][2] = 0;
- }
- }
- }
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
- for (ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx) {
- fc->empty_line_cdf[tx_size][plane][tx_class][ctx][0] = AOM_ICDF(
- 128 *
- (aom_cdf_prob)fc->empty_line[tx_size][plane][tx_class][ctx]);
- fc->empty_line_cdf[tx_size][plane][tx_class][ctx][1] =
- AOM_ICDF(32768);
- fc->empty_line_cdf[tx_size][plane][tx_class][ctx][2] = 0;
- }
- }
- }
- }
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
- for (ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx) {
- fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][0] = AOM_ICDF(
- 128 * (aom_cdf_prob)fc->hv_eob[tx_size][plane][tx_class][ctx]);
- fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][1] = AOM_ICDF(32768);
- fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][2] = 0;
- }
- }
- }
- }
-#endif // CONFIG_CTX1D
-}
-#endif // LV_MAP_PROB
+const int8_t av1_nz_map_ctx_offset_8x16[128] = {
+ 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21,
+ 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
-void av1_adapt_txb_probs(AV1_COMMON *cm, unsigned int count_sat,
- unsigned int update_factor) {
- FRAME_CONTEXT *fc = cm->fc;
- const FRAME_CONTEXT *pre_fc = cm->pre_fc;
- const FRAME_COUNTS *counts = &cm->counts;
- TX_SIZE tx_size;
- int plane, ctx, level;
+const int8_t av1_nz_map_ctx_offset_16x8[128] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- // Update probability models for transform block skip flag
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size)
- for (ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
- fc->txb_skip[tx_size][ctx] = mode_mv_merge_probs(
- pre_fc->txb_skip[tx_size][ctx], counts->txb_skip[tx_size][ctx]);
+const int8_t av1_nz_map_ctx_offset_16x32[512] = {
+ 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (plane = 0; plane < PLANE_TYPES; ++plane)
- for (ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
- fc->dc_sign[plane][ctx] = mode_mv_merge_probs(
- pre_fc->dc_sign[plane][ctx], counts->dc_sign[plane][ctx]);
+const int8_t av1_nz_map_ctx_offset_32x16[512] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- // Update probability models for non-zero coefficient map and eob flag.
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size)
- for (plane = 0; plane < PLANE_TYPES; ++plane)
- for (level = 0; level < NUM_BASE_LEVELS; ++level)
- for (ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx)
- fc->coeff_base[tx_size][plane][level][ctx] =
- merge_probs(pre_fc->coeff_base[tx_size][plane][level][ctx],
- counts->coeff_base[tx_size][plane][level][ctx],
- count_sat, update_factor);
+const int8_t av1_nz_map_ctx_offset_32x64[1024] = {
+ 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
- fc->nz_map[tx_size][plane][ctx] = merge_probs(
- pre_fc->nz_map[tx_size][plane][ctx],
- counts->nz_map[tx_size][plane][ctx], count_sat, update_factor);
- }
+const int8_t av1_nz_map_ctx_offset_64x32[1024] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16,
+ 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) {
- fc->eob_flag[tx_size][plane][ctx] = merge_probs(
- pre_fc->eob_flag[tx_size][plane][ctx],
- counts->eob_flag[tx_size][plane][ctx], count_sat, update_factor);
- }
- }
- }
+const int8_t av1_nz_map_ctx_offset_4x16[64] = {
+ 0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane) {
- for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
- fc->coeff_lps[tx_size][plane][ctx] = merge_probs(
- pre_fc->coeff_lps[tx_size][plane][ctx],
- counts->coeff_lps[tx_size][plane][ctx], count_sat, update_factor);
- }
-#if BR_NODE
- for (int br = 0; br < BASE_RANGE_SETS; ++br) {
- for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
- fc->coeff_br[tx_size][plane][br][ctx] =
- merge_probs(pre_fc->coeff_br[tx_size][plane][br][ctx],
- counts->coeff_br[tx_size][plane][br][ctx], count_sat,
- update_factor);
- }
- }
-#endif // BR_NODE
- }
- }
-#if CONFIG_CTX1D
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane)
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
- fc->eob_mode[tx_size][plane][tx_class] =
- merge_probs(pre_fc->eob_mode[tx_size][plane][tx_class],
- counts->eob_mode[tx_size][plane][tx_class], count_sat,
- update_factor);
- }
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane)
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
- for (ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx)
- fc->empty_line[tx_size][plane][tx_class][ctx] =
- merge_probs(pre_fc->empty_line[tx_size][plane][tx_class][ctx],
- counts->empty_line[tx_size][plane][tx_class][ctx],
- count_sat, update_factor);
- }
- for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (plane = 0; plane < PLANE_TYPES; ++plane)
- for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
- for (ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx)
- fc->hv_eob[tx_size][plane][tx_class][ctx] =
- merge_probs(pre_fc->hv_eob[tx_size][plane][tx_class][ctx],
- counts->hv_eob[tx_size][plane][tx_class][ctx],
- count_sat, update_factor);
- }
-#endif
-}
+const int8_t av1_nz_map_ctx_offset_16x4[64] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
+
+const int8_t av1_nz_map_ctx_offset_8x32[256] = {
+ 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21,
+ 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
+
+const int8_t av1_nz_map_ctx_offset_32x8[256] = {
+ 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21,
+};
+
+const int8_t *av1_nz_map_ctx_offset[19] = {
+ av1_nz_map_ctx_offset_4x4, // TX_4x4
+ av1_nz_map_ctx_offset_8x8, // TX_8x8
+ av1_nz_map_ctx_offset_16x16, // TX_16x16
+ av1_nz_map_ctx_offset_32x32, // TX_32x32
+ av1_nz_map_ctx_offset_32x32, // TX_32x32
+ av1_nz_map_ctx_offset_4x16, // TX_4x8
+ av1_nz_map_ctx_offset_8x4, // TX_8x4
+ av1_nz_map_ctx_offset_8x32, // TX_8x16
+ av1_nz_map_ctx_offset_16x8, // TX_16x8
+ av1_nz_map_ctx_offset_16x32, // TX_16x32
+ av1_nz_map_ctx_offset_32x16, // TX_32x16
+ av1_nz_map_ctx_offset_32x64, // TX_32x64
+ av1_nz_map_ctx_offset_64x32, // TX_64x32
+ av1_nz_map_ctx_offset_4x16, // TX_4x16
+ av1_nz_map_ctx_offset_16x4, // TX_16x4
+ av1_nz_map_ctx_offset_8x32, // TX_8x32
+ av1_nz_map_ctx_offset_32x8, // TX_32x8
+ av1_nz_map_ctx_offset_16x32, // TX_16x64
+ av1_nz_map_ctx_offset_64x32, // TX_64x16
+};
void av1_init_lv_map(AV1_COMMON *cm) {
LV_MAP_CTX_TABLE *coeff_ctx_table = &cm->coeff_ctx_table;
for (int row = 0; row < 2; ++row) {
for (int col = 0; col < 2; ++col) {
- for (int sig_mag = 0; sig_mag < 2; ++sig_mag) {
+ for (int sig_mag = 0; sig_mag < 3; ++sig_mag) {
for (int count = 0; count < BASE_CONTEXT_POSITION_NUM + 1; ++count) {
+ if (row == 0 && col == 0 && count > 5) continue;
+ if ((row == 0 || col == 0) && count > 8) continue;
+
coeff_ctx_table->base_ctx_table[row][col][sig_mag][count] =
get_base_ctx_from_count_mag(row, col, count, sig_mag);
}
@@ -320,3 +469,7 @@ void av1_init_lv_map(AV1_COMMON *cm) {
}
}
}
+
+const int16_t k_eob_group_start[12] = { 0, 1, 2, 3, 5, 9,
+ 17, 33, 65, 129, 257, 513 };
+const int16_t k_eob_offset_bits[12] = { 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
diff --git a/third_party/aom/av1/common/txb_common.h b/third_party/aom/av1/common/txb_common.h
index 3bf8f8c61..cdac90d9e 100644
--- a/third_party/aom/av1/common/txb_common.h
+++ b/third_party/aom/av1/common/txb_common.h
@@ -12,72 +12,133 @@
#ifndef AV1_COMMON_TXB_COMMON_H_
#define AV1_COMMON_TXB_COMMON_H_
-#define REDUCE_CONTEXT_DEPENDENCY 0
-#define MIN_SCAN_IDX_REDUCE_CONTEXT_DEPENDENCY 0
+extern const int16_t k_eob_group_start[12];
+extern const int16_t k_eob_offset_bits[12];
-extern const int16_t av1_coeff_band_4x4[16];
+extern const int8_t av1_coeff_band_4x4[16];
-extern const int16_t av1_coeff_band_8x8[64];
+extern const int8_t av1_coeff_band_8x8[64];
-extern const int16_t av1_coeff_band_16x16[256];
+extern const int8_t av1_coeff_band_16x16[256];
-extern const int16_t av1_coeff_band_32x32[1024];
+extern const int8_t av1_coeff_band_32x32[1024];
+
+extern const int8_t *av1_nz_map_ctx_offset[TX_SIZES_ALL];
typedef struct txb_ctx {
int txb_skip_ctx;
int dc_sign_ctx;
} TXB_CTX;
-static INLINE TX_SIZE get_txsize_context(TX_SIZE tx_size) {
- return txsize_sqr_up_map[tx_size];
-}
+static const int base_level_count_to_index[13] = {
+ 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+};
-static int base_ref_offset[BASE_CONTEXT_POSITION_NUM][2] = {
+// Note: TX_PAD_2D is dependent to this offset table.
+static const int base_ref_offset[BASE_CONTEXT_POSITION_NUM][2] = {
/* clang-format off*/
{ -2, 0 }, { -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, -2 }, { 0, -1 }, { 0, 1 },
{ 0, 2 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 2, 0 }
/* clang-format on*/
};
-static INLINE int get_level_count(const tran_low_t *tcoeffs, int bwl,
- int height, int row, int col, int level,
- int (*nb_offset)[2], int nb_num) {
- int count = 0;
- for (int idx = 0; idx < nb_num; ++idx) {
- const int ref_row = row + nb_offset[idx][0];
- const int ref_col = col + nb_offset[idx][1];
- if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
- ref_col >= (1 << bwl))
- continue;
- const int pos = (ref_row << bwl) + ref_col;
- tran_low_t abs_coeff = abs(tcoeffs[pos]);
- count += abs_coeff > level;
+#define CONTEXT_MAG_POSITION_NUM 3
+static const int mag_ref_offset_with_txclass[3][CONTEXT_MAG_POSITION_NUM][2] = {
+ { { 0, 1 }, { 1, 0 }, { 1, 1 } },
+ { { 0, 1 }, { 1, 0 }, { 0, 2 } },
+ { { 0, 1 }, { 1, 0 }, { 2, 0 } }
+};
+static const int mag_ref_offset[CONTEXT_MAG_POSITION_NUM][2] = {
+ { 0, 1 }, { 1, 0 }, { 1, 1 }
+};
+
+static const TX_CLASS tx_type_to_class[TX_TYPES] = {
+ TX_CLASS_2D, // DCT_DCT
+ TX_CLASS_2D, // ADST_DCT
+ TX_CLASS_2D, // DCT_ADST
+ TX_CLASS_2D, // ADST_ADST
+ TX_CLASS_2D, // FLIPADST_DCT
+ TX_CLASS_2D, // DCT_FLIPADST
+ TX_CLASS_2D, // FLIPADST_FLIPADST
+ TX_CLASS_2D, // ADST_FLIPADST
+ TX_CLASS_2D, // FLIPADST_ADST
+ TX_CLASS_2D, // IDTX
+ TX_CLASS_VERT, // V_DCT
+ TX_CLASS_HORIZ, // H_DCT
+ TX_CLASS_VERT, // V_ADST
+ TX_CLASS_HORIZ, // H_ADST
+ TX_CLASS_VERT, // V_FLIPADST
+ TX_CLASS_HORIZ, // H_FLIPADST
+};
+
+static const int8_t eob_to_pos_small[33] = {
+ 0, 1, 2, // 0-2
+ 3, 3, // 3-4
+ 4, 4, 4, 4, // 5-8
+ 5, 5, 5, 5, 5, 5, 5, 5, // 9-16
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32
+};
+
+static const int8_t eob_to_pos_large[17] = {
+ 6, // place holder
+ 7, // 33-64
+ 8, 8, // 65-128
+ 9, 9, 9, 9, // 129-256
+ 10, 10, 10, 10, 10, 10, 10, 10, // 257-512
+ 11 // 513-
+};
+
+static INLINE int get_eob_pos_token(const int eob, int *const extra) {
+ int t;
+
+ if (eob < 33) {
+ t = eob_to_pos_small[eob];
+ } else {
+ const int e = AOMMIN((eob - 1) >> 5, 16);
+ t = eob_to_pos_large[e];
}
- return count;
+
+ *extra = eob - k_eob_group_start[t];
+
+ return t;
}
-static INLINE void get_mag(int *mag, const tran_low_t *tcoeffs, int bwl,
- int height, int row, int col, int (*nb_offset)[2],
- int nb_num) {
- mag[0] = 0;
- mag[1] = 0;
- for (int idx = 0; idx < nb_num; ++idx) {
- const int ref_row = row + nb_offset[idx][0];
- const int ref_col = col + nb_offset[idx][1];
- if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
- ref_col >= (1 << bwl))
- continue;
- const int pos = (ref_row << bwl) + ref_col;
- tran_low_t abs_coeff = abs(tcoeffs[pos]);
- if (nb_offset[idx][0] >= 0 && nb_offset[idx][1] >= 0) {
- if (abs_coeff > mag[0]) {
- mag[0] = abs_coeff;
- mag[1] = 1;
- } else if (abs_coeff == mag[0]) {
- ++mag[1];
- }
- }
- }
+static INLINE int av1_get_eob_pos_ctx(const TX_TYPE tx_type,
+ const int eob_token) {
+ static const int8_t tx_type_to_offset[TX_TYPES] = {
+ -1, // DCT_DCT
+ -1, // ADST_DCT
+ -1, // DCT_ADST
+ -1, // ADST_ADST
+ -1, // FLIPADST_DCT
+ -1, // DCT_FLIPADST
+ -1, // FLIPADST_FLIPADST
+ -1, // ADST_FLIPADST
+ -1, // FLIPADST_ADST
+ -1, // IDTX
+ 10, // V_DCT
+ 10, // H_DCT
+ 10, // V_ADST
+ 10, // H_ADST
+ 10, // V_FLIPADST
+ 10, // H_FLIPADST
+ };
+ return eob_token + tx_type_to_offset[tx_type];
+}
+
+static INLINE int get_txb_bwl(TX_SIZE tx_size) {
+ tx_size = av1_get_adjusted_tx_size(tx_size);
+ return tx_size_wide_log2[tx_size];
+}
+
+static INLINE int get_txb_wide(TX_SIZE tx_size) {
+ tx_size = av1_get_adjusted_tx_size(tx_size);
+ return tx_size_wide[tx_size];
+}
+
+static INLINE int get_txb_high(TX_SIZE tx_size) {
+ tx_size = av1_get_adjusted_tx_size(tx_size);
+ return tx_size_high[tx_size];
}
static INLINE void get_base_count_mag(int *mag, int *count,
@@ -110,67 +171,124 @@ static INLINE void get_base_count_mag(int *mag, int *count,
}
}
-static INLINE int get_level_count_mag(int *mag, const tran_low_t *tcoeffs,
- int bwl, int height, int row, int col,
- int level, int (*nb_offset)[2],
- int nb_num) {
- const int stride = 1 << bwl;
+static INLINE uint8_t *set_levels(uint8_t *const levels_buf, const int width) {
+ return levels_buf + TX_PAD_TOP * (width + TX_PAD_HOR);
+}
+
+static INLINE int get_padded_idx(const int idx, const int bwl) {
+ return idx + ((idx >> bwl) << TX_PAD_HOR_LOG2);
+}
+
+static INLINE int get_level_count(const uint8_t *const levels, const int stride,
+ const int row, const int col, const int level,
+ const int (*nb_offset)[2], const int nb_num) {
int count = 0;
- *mag = 0;
+
for (int idx = 0; idx < nb_num; ++idx) {
const int ref_row = row + nb_offset[idx][0];
const int ref_col = col + nb_offset[idx][1];
- if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
- continue;
- const int pos = (ref_row << bwl) + ref_col;
- tran_low_t abs_coeff = abs(tcoeffs[pos]);
- count += abs_coeff > level;
- if (nb_offset[idx][0] >= 0 && nb_offset[idx][1] >= 0)
- *mag = AOMMAX(*mag, abs_coeff);
+ const int pos = ref_row * stride + ref_col;
+ count += levels[pos] > level;
}
return count;
}
+static INLINE void get_level_mag(const uint8_t *const levels, const int stride,
+ const int row, const int col, int *const mag) {
+ for (int idx = 0; idx < CONTEXT_MAG_POSITION_NUM; ++idx) {
+ const int ref_row = row + mag_ref_offset[idx][0];
+ const int ref_col = col + mag_ref_offset[idx][1];
+ const int pos = ref_row * stride + ref_col;
+ mag[idx] = levels[pos];
+ }
+}
+
static INLINE int get_base_ctx_from_count_mag(int row, int col, int count,
int sig_mag) {
- const int ctx = (count + 1) >> 1;
+ const int ctx = base_level_count_to_index[count];
int ctx_idx = -1;
+
if (row == 0 && col == 0) {
- ctx_idx = (ctx << 1) + sig_mag;
- // TODO(angiebird): turn this on once the optimization is finalized
- // assert(ctx_idx < 8);
+ if (sig_mag >= 2) return ctx_idx = 0;
+ if (sig_mag == 1) {
+ if (count >= 2)
+ ctx_idx = 1;
+ else
+ ctx_idx = 2;
+
+ return ctx_idx;
+ }
+
+ ctx_idx = 3 + ctx;
+ assert(ctx_idx <= 6);
+ return ctx_idx;
} else if (row == 0) {
- ctx_idx = 8 + (ctx << 1) + sig_mag;
- // TODO(angiebird): turn this on once the optimization is finalized
- // assert(ctx_idx < 18);
+ if (sig_mag >= 2) return ctx_idx = 6;
+ if (sig_mag == 1) {
+ if (count >= 2)
+ ctx_idx = 7;
+ else
+ ctx_idx = 8;
+ return ctx_idx;
+ }
+
+ ctx_idx = 9 + ctx;
+ assert(ctx_idx <= 11);
+ return ctx_idx;
} else if (col == 0) {
- ctx_idx = 8 + 10 + (ctx << 1) + sig_mag;
+ if (sig_mag >= 2) return ctx_idx = 12;
+ if (sig_mag == 1) {
+ if (count >= 2)
+ ctx_idx = 13;
+ else
+ ctx_idx = 14;
+
+ return ctx_idx;
+ }
+
+ ctx_idx = 15 + ctx;
+ assert(ctx_idx <= 17);
// TODO(angiebird): turn this on once the optimization is finalized
// assert(ctx_idx < 28);
} else {
- ctx_idx = 8 + 10 + 10 + (ctx << 1) + sig_mag;
- assert(ctx_idx < COEFF_BASE_CONTEXTS);
+ if (sig_mag >= 2) return ctx_idx = 18;
+ if (sig_mag == 1) {
+ if (count >= 2)
+ ctx_idx = 19;
+ else
+ ctx_idx = 20;
+ return ctx_idx;
+ }
+
+ ctx_idx = 21 + ctx;
+
+ assert(ctx_idx <= 24);
}
return ctx_idx;
}
-static INLINE int get_base_ctx(const tran_low_t *tcoeffs,
- int c, // raster order
- const int bwl, const int height,
- const int level) {
+static INLINE int get_base_ctx(const uint8_t *const levels,
+ const int c, // raster order
+ const int bwl, const int level_minus_1,
+ const int count) {
const int row = c >> bwl;
const int col = c - (row << bwl);
- const int level_minus_1 = level - 1;
- int mag;
- int count =
- get_level_count_mag(&mag, tcoeffs, bwl, height, row, col, level_minus_1,
- base_ref_offset, BASE_CONTEXT_POSITION_NUM);
- int ctx_idx = get_base_ctx_from_count_mag(row, col, count, mag > level);
+ const int stride = (1 << bwl) + TX_PAD_HOR;
+ int mag_count = 0;
+ int nb_mag[3] = { 0 };
+
+ get_level_mag(levels, stride, row, col, nb_mag);
+
+ for (int idx = 0; idx < 3; ++idx)
+ mag_count += nb_mag[idx] > (level_minus_1 + 1);
+ const int ctx_idx =
+ get_base_ctx_from_count_mag(row, col, count, AOMMIN(2, mag_count));
return ctx_idx;
}
#define BR_CONTEXT_POSITION_NUM 8 // Base range coefficient context
-static int br_ref_offset[BR_CONTEXT_POSITION_NUM][2] = {
+// Note: TX_PAD_2D is dependent to this offset table.
+static const int br_ref_offset[BR_CONTEXT_POSITION_NUM][2] = {
/* clang-format off*/
{ -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, -1 },
{ 0, 1 }, { 1, -1 }, { 1, 0 }, { 1, 1 },
@@ -181,18 +299,8 @@ static const int br_level_map[9] = {
0, 0, 1, 1, 2, 2, 3, 3, 3,
};
-static const int coeff_to_br_index[COEFF_BASE_RANGE] = {
- 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
-};
-
-static const int br_index_to_coeff[BASE_RANGE_SETS] = {
- 0, 2, 6,
-};
-
-static const int br_extra_bits[BASE_RANGE_SETS] = {
- 1, 2, 3,
-};
-
+// Note: If BR_MAG_OFFSET changes, the calculation of offset in
+// get_br_ctx_from_count_mag() must be updated.
#define BR_MAG_OFFSET 1
// TODO(angiebird): optimize this function by using a table to map from
// count/mag to ctx
@@ -223,369 +331,356 @@ static INLINE int get_br_count_mag(int *mag, const tran_low_t *tcoeffs, int bwl,
return count;
}
-static INLINE int get_br_ctx_from_count_mag(int row, int col, int count,
- int mag) {
- int offset = 0;
- if (mag <= BR_MAG_OFFSET)
- offset = 0;
- else if (mag <= 3)
- offset = 1;
- else if (mag <= 5)
- offset = 2;
- else
- offset = 3;
-
- int ctx = br_level_map[count];
- ctx += offset * BR_TMP_OFFSET;
-
+static INLINE int get_br_ctx_from_count_mag(const int row, const int col,
+ const int count, const int mag) {
// DC: 0 - 1
- if (row == 0 && col == 0) return ctx;
-
// Top row: 2 - 4
- if (row == 0) return 2 + ctx;
-
// Left column: 5 - 7
- if (col == 0) return 5 + ctx;
-
// others: 8 - 11
- return 8 + ctx;
+ static const int offset_pos[2][2] = { { 8, 5 }, { 2, 0 } };
+ const int mag_clamp = AOMMIN(mag, 6);
+ const int offset = mag_clamp >> 1;
+ const int ctx =
+ br_level_map[count] + offset * BR_TMP_OFFSET + offset_pos[!row][!col];
+ return ctx;
}
-static INLINE int get_br_ctx(const tran_low_t *tcoeffs,
- const int c, // raster order
- const int bwl, const int height) {
+static INLINE int get_br_ctx_2d(const uint8_t *const levels,
+ const int c, // raster order
+ const int bwl) {
+ assert(c > 0);
const int row = c >> bwl;
const int col = c - (row << bwl);
- const int level_minus_1 = NUM_BASE_LEVELS;
- int mag;
- const int count =
- get_level_count_mag(&mag, tcoeffs, bwl, height, row, col, level_minus_1,
- br_ref_offset, BR_CONTEXT_POSITION_NUM);
- const int ctx = get_br_ctx_from_count_mag(row, col, count, mag);
- return ctx;
+ const int stride = (1 << bwl) + TX_PAD_HOR;
+ const int pos = row * stride + col;
+ int mag = AOMMIN(levels[pos + 1], MAX_BASE_BR_RANGE) +
+ AOMMIN(levels[pos + stride], MAX_BASE_BR_RANGE) +
+ AOMMIN(levels[pos + 1 + stride], MAX_BASE_BR_RANGE);
+ mag = AOMMIN((mag + 1) >> 1, 6);
+ //((row | col) < 2) is equivalent to ((row < 2) && (col < 2))
+ if ((row | col) < 2) return mag + 7;
+ return mag + 14;
}
-#define SIG_REF_OFFSET_NUM 7
-static int sig_ref_offset[SIG_REF_OFFSET_NUM][2] = {
- { -2, -1 }, { -2, 0 }, { -1, -2 }, { -1, -1 },
- { -1, 0 }, { 0, -2 }, { 0, -1 },
-};
-
-#if REDUCE_CONTEXT_DEPENDENCY
-static INLINE int get_nz_count(const tran_low_t *tcoeffs, int bwl, int height,
- int row, int col, int prev_row, int prev_col) {
- int count = 0;
- for (int idx = 0; idx < SIG_REF_OFFSET_NUM; ++idx) {
- const int ref_row = row + sig_ref_offset[idx][0];
- const int ref_col = col + sig_ref_offset[idx][1];
- if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
- ref_col >= (1 << bwl) || (prev_row == ref_row && prev_col == ref_col))
- continue;
- const int nb_pos = (ref_row << bwl) + ref_col;
- count += (tcoeffs[nb_pos] != 0);
- }
- return count;
-}
-#else
-static INLINE int get_nz_count(const tran_low_t *tcoeffs, int bwl, int height,
- int row, int col) {
- int count = 0;
- for (int idx = 0; idx < SIG_REF_OFFSET_NUM; ++idx) {
- const int ref_row = row + sig_ref_offset[idx][0];
- const int ref_col = col + sig_ref_offset[idx][1];
- if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
- ref_col >= (1 << bwl))
- continue;
- const int nb_pos = (ref_row << bwl) + ref_col;
- count += (tcoeffs[nb_pos] != 0);
- }
- return count;
-}
-#endif
-
-static INLINE TX_CLASS get_tx_class(TX_TYPE tx_type) {
- switch (tx_type) {
-#if CONFIG_EXT_TX
- case V_DCT:
- case V_ADST:
- case V_FLIPADST: return TX_CLASS_VERT;
- case H_DCT:
- case H_ADST:
- case H_FLIPADST: return TX_CLASS_HORIZ;
-#endif
- default: return TX_CLASS_2D;
+static AOM_FORCE_INLINE int get_br_ctx(const uint8_t *const levels,
+ const int c, // raster order
+ const int bwl, const TX_CLASS tx_class) {
+ const int row = c >> bwl;
+ const int col = c - (row << bwl);
+ const int stride = (1 << bwl) + TX_PAD_HOR;
+ const int pos = row * stride + col;
+ int mag = levels[pos + 1];
+ mag += levels[pos + stride];
+ switch (tx_class) {
+ case TX_CLASS_2D:
+ mag += levels[pos + stride + 1];
+ mag = AOMMIN((mag + 1) >> 1, 6);
+ if (c == 0) return mag;
+ if ((row < 2) && (col < 2)) return mag + 7;
+ break;
+ case TX_CLASS_HORIZ:
+ mag += levels[pos + 2];
+ mag = AOMMIN((mag + 1) >> 1, 6);
+ if (c == 0) return mag;
+ if (col == 0) return mag + 7;
+ break;
+ case TX_CLASS_VERT:
+ mag += levels[pos + (stride << 1)];
+ mag = AOMMIN((mag + 1) >> 1, 6);
+ if (c == 0) return mag;
+ if (row == 0) return mag + 7;
+ break;
+ default: break;
}
-}
-// TODO(angiebird): optimize this function by generate a table that maps from
-// count to ctx
-static INLINE int get_nz_map_ctx_from_count(int count,
- int coeff_idx, // raster order
- int bwl, TX_TYPE tx_type) {
- (void)tx_type;
- const int row = coeff_idx >> bwl;
- const int col = coeff_idx - (row << bwl);
- int ctx = 0;
-#if CONFIG_EXT_TX
- int tx_class = get_tx_class(tx_type);
- int offset;
- if (tx_class == TX_CLASS_2D)
- offset = 0;
- else if (tx_class == TX_CLASS_VERT)
- offset = SIG_COEF_CONTEXTS_2D;
- else
- offset = SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D;
-#else
- int offset = 0;
-#endif
-
- if (row == 0 && col == 0) return offset + 0;
-
- if (row == 0 && col == 1) return offset + 1 + count;
-
- if (row == 1 && col == 0) return offset + 3 + count;
-
- if (row == 1 && col == 1) {
- ctx = (count + 1) >> 1;
-
- assert(5 + ctx <= 7);
-
- return offset + 5 + ctx;
- }
+ return mag + 14;
+}
- if (row == 0) {
- ctx = (count + 1) >> 1;
+#define SIG_REF_OFFSET_NUM 5
- assert(ctx < 2);
- return offset + 8 + ctx;
- }
+// Note: TX_PAD_2D is dependent to these offset tables.
+static const int sig_ref_offset[SIG_REF_OFFSET_NUM][2] = {
+ { 0, 1 }, { 1, 0 }, { 1, 1 }, { 0, 2 }, { 2, 0 }
+ // , { 1, 2 }, { 2, 1 },
+};
- if (col == 0) {
- ctx = (count + 1) >> 1;
+static const int sig_ref_offset_vert[SIG_REF_OFFSET_NUM][2] = {
+ { 1, 0 }, { 2, 0 }, { 0, 1 }, { 3, 0 }, { 4, 0 }
+ // , { 1, 1 }, { 2, 1 },
+};
- assert(ctx < 2);
- return offset + 10 + ctx;
- }
+static const int sig_ref_offset_horiz[SIG_REF_OFFSET_NUM][2] = {
+ { 0, 1 }, { 0, 2 }, { 1, 0 }, { 0, 3 }, { 0, 4 }
+ // , { 1, 1 }, { 1, 2 },
+};
- ctx = count >> 1;
+#define SIG_REF_DIFF_OFFSET_NUM 3
- assert(12 + ctx < 16);
+static const int sig_ref_diff_offset[SIG_REF_DIFF_OFFSET_NUM][2] = {
+ { 1, 1 }, { 0, 2 }, { 2, 0 }
+};
- return offset + 12 + ctx;
-}
+static const int sig_ref_diff_offset_vert[SIG_REF_DIFF_OFFSET_NUM][2] = {
+ { 2, 0 }, { 3, 0 }, { 4, 0 }
+};
-static INLINE int get_nz_map_ctx(const tran_low_t *tcoeffs, const int scan_idx,
- const int16_t *scan, const int bwl,
- const int height, TX_TYPE tx_type) {
- const int coeff_idx = scan[scan_idx];
- const int row = coeff_idx >> bwl;
- const int col = coeff_idx - (row << bwl);
-#if REDUCE_CONTEXT_DEPENDENCY
- int prev_coeff_idx;
- int prev_row;
- int prev_col;
- if (scan_idx > MIN_SCAN_IDX_REDUCE_CONTEXT_DEPENDENCY) {
- prev_coeff_idx = scan[scan_idx - 1]; // raster order
- prev_row = prev_coeff_idx >> bwl;
- prev_col = prev_coeff_idx - (prev_row << bwl);
- } else {
- prev_coeff_idx = -1;
- prev_row = -1;
- prev_col = -1;
- }
- int count = get_nz_count(tcoeffs, bwl, height, row, col, prev_row, prev_col);
-#else
- int count = get_nz_count(tcoeffs, bwl, height, row, col);
-#endif
- return get_nz_map_ctx_from_count(count, coeff_idx, bwl, tx_type);
-}
+static const int sig_ref_diff_offset_horiz[SIG_REF_DIFF_OFFSET_NUM][2] = {
+ { 0, 2 }, { 0, 3 }, { 0, 4 }
+};
-static INLINE int get_eob_ctx(const tran_low_t *tcoeffs,
- const int coeff_idx, // raster order
- const TX_SIZE txs_ctx, TX_TYPE tx_type) {
- (void)tcoeffs;
- int offset = 0;
-#if CONFIG_CTX1D
- TX_CLASS tx_class = get_tx_class(tx_type);
- if (tx_class == TX_CLASS_VERT)
- offset = EOB_COEF_CONTEXTS_2D;
- else if (tx_class == TX_CLASS_HORIZ)
- offset = EOB_COEF_CONTEXTS_2D + EOB_COEF_CONTEXTS_1D;
-#else
- (void)tx_type;
-#endif
-
- if (txs_ctx == TX_4X4) return offset + av1_coeff_band_4x4[coeff_idx];
- if (txs_ctx == TX_8X8) return offset + av1_coeff_band_8x8[coeff_idx];
- if (txs_ctx == TX_16X16) return offset + av1_coeff_band_16x16[coeff_idx];
- if (txs_ctx == TX_32X32) return offset + av1_coeff_band_32x32[coeff_idx];
-
- assert(0);
- return 0;
-}
+static const uint8_t clip_max3[256] = {
+ 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+};
-static INLINE void set_dc_sign(int *cul_level, tran_low_t v) {
- if (v < 0)
- *cul_level |= 1 << COEFF_CONTEXT_BITS;
- else if (v > 0)
- *cul_level += 2 << COEFF_CONTEXT_BITS;
-}
+static AOM_FORCE_INLINE int get_nz_mag(const uint8_t *const levels,
+ const int bwl, const TX_CLASS tx_class) {
+ int mag;
-static INLINE int get_dc_sign_ctx(int dc_sign) {
- int dc_sign_ctx = 0;
- if (dc_sign < 0)
- dc_sign_ctx = 1;
- else if (dc_sign > 0)
- dc_sign_ctx = 2;
+ // Note: AOMMIN(level, 3) is useless for decoder since level < 3.
+ mag = clip_max3[levels[1]]; // { 0, 1 }
+ mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 }
+
+ if (tx_class == TX_CLASS_2D) {
+ mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR + 1]]; // { 1, 1 }
+ mag += clip_max3[levels[2]]; // { 0, 2 }
+ mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
+ } else if (tx_class == TX_CLASS_VERT) {
+ mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
+ mag += clip_max3[levels[(3 << bwl) + (3 << TX_PAD_HOR_LOG2)]]; // { 3, 0 }
+ mag += clip_max3[levels[(4 << bwl) + (4 << TX_PAD_HOR_LOG2)]]; // { 4, 0 }
+ } else {
+ mag += clip_max3[levels[2]]; // { 0, 2 }
+ mag += clip_max3[levels[3]]; // { 0, 3 }
+ mag += clip_max3[levels[4]]; // { 0, 4 }
+ }
- return dc_sign_ctx;
+ return mag;
}
-static INLINE void get_txb_ctx(BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int plane, const ENTROPY_CONTEXT *a,
- const ENTROPY_CONTEXT *l, TXB_CTX *txb_ctx) {
- const int txb_w_unit = tx_size_wide_unit[tx_size];
- const int txb_h_unit = tx_size_high_unit[tx_size];
- int ctx_offset = (plane == 0) ? 0 : 7;
-
- if (plane_bsize > txsize_to_bsize[tx_size]) ctx_offset += 3;
-
- int dc_sign = 0;
- for (int k = 0; k < txb_w_unit; ++k) {
- int sign = ((uint8_t)a[k]) >> COEFF_CONTEXT_BITS;
- if (sign == 1)
- --dc_sign;
- else if (sign == 2)
- ++dc_sign;
- else if (sign != 0)
- assert(0);
- }
-
- for (int k = 0; k < txb_h_unit; ++k) {
- int sign = ((uint8_t)l[k]) >> COEFF_CONTEXT_BITS;
- if (sign == 1)
- --dc_sign;
- else if (sign == 2)
- ++dc_sign;
- else if (sign != 0)
- assert(0);
+static INLINE int get_nz_count(const uint8_t *const levels, const int bwl,
+ const TX_CLASS tx_class) {
+ int count;
+
+ count = (levels[1] != 0); // { 0, 1 }
+ count += (levels[(1 << bwl) + TX_PAD_HOR] != 0); // { 1, 0 }
+
+ for (int idx = 0; idx < SIG_REF_DIFF_OFFSET_NUM; ++idx) {
+ const int row_offset =
+ ((tx_class == TX_CLASS_2D) ? sig_ref_diff_offset[idx][0]
+ : ((tx_class == TX_CLASS_VERT)
+ ? sig_ref_diff_offset_vert[idx][0]
+ : sig_ref_diff_offset_horiz[idx][0]));
+ const int col_offset =
+ ((tx_class == TX_CLASS_2D) ? sig_ref_diff_offset[idx][1]
+ : ((tx_class == TX_CLASS_VERT)
+ ? sig_ref_diff_offset_vert[idx][1]
+ : sig_ref_diff_offset_horiz[idx][1]));
+ const int nb_pos =
+ (row_offset << bwl) + (row_offset << TX_PAD_HOR_LOG2) + col_offset;
+ count += (levels[nb_pos] != 0);
}
+ return count;
+}
- txb_ctx->dc_sign_ctx = get_dc_sign_ctx(dc_sign);
-
- if (plane == 0) {
- int top = 0;
- int left = 0;
+#define NZ_MAP_CTX_0 SIG_COEF_CONTEXTS_2D
+#define NZ_MAP_CTX_5 (NZ_MAP_CTX_0 + 5)
+#define NZ_MAP_CTX_10 (NZ_MAP_CTX_0 + 10)
+
+static const int nz_map_ctx_offset_1d[32] = {
+ NZ_MAP_CTX_0, NZ_MAP_CTX_5, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+ NZ_MAP_CTX_10, NZ_MAP_CTX_10,
+};
- for (int k = 0; k < txb_w_unit; ++k) {
- top = AOMMAX(top, ((uint8_t)a[k] & COEFF_CONTEXT_MASK));
+static AOM_FORCE_INLINE int get_nz_map_ctx_from_stats(
+ const int stats,
+ const int coeff_idx, // raster order
+ const int bwl, const TX_SIZE tx_size, const TX_CLASS tx_class) {
+ // tx_class == 0(TX_CLASS_2D)
+ if ((tx_class | coeff_idx) == 0) return 0;
+ int ctx = (stats + 1) >> 1;
+ ctx = AOMMIN(ctx, 4);
+ switch (tx_class) {
+ case TX_CLASS_2D: {
+ // This is the algorithm to generate av1_nz_map_ctx_offset[][]
+ // const int width = tx_size_wide[tx_size];
+ // const int height = tx_size_high[tx_size];
+ // if (width < height) {
+ // if (row < 2) return 11 + ctx;
+ // } else if (width > height) {
+ // if (col < 2) return 16 + ctx;
+ // }
+ // if (row + col < 2) return ctx + 1;
+ // if (row + col < 4) return 5 + ctx + 1;
+ // return 21 + ctx;
+ return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
}
-
- for (int k = 0; k < txb_h_unit; ++k) {
- left = AOMMAX(left, ((uint8_t)l[k] & COEFF_CONTEXT_MASK));
+ case TX_CLASS_HORIZ: {
+ const int row = coeff_idx >> bwl;
+ const int col = coeff_idx - (row << bwl);
+ return ctx + nz_map_ctx_offset_1d[col];
+ break;
}
-
- top = AOMMIN(top, 255);
- left = AOMMIN(left, 255);
-
- if (plane_bsize == txsize_to_bsize[tx_size])
- txb_ctx->txb_skip_ctx = 0;
- else if (top == 0 && left == 0)
- txb_ctx->txb_skip_ctx = 1;
- else if (top == 0 || left == 0)
- txb_ctx->txb_skip_ctx = 2 + (AOMMAX(top, left) > 3);
- else if (AOMMAX(top, left) <= 3)
- txb_ctx->txb_skip_ctx = 4;
- else if (AOMMIN(top, left) <= 3)
- txb_ctx->txb_skip_ctx = 5;
- else
- txb_ctx->txb_skip_ctx = 6;
- } else {
- int ctx_base = get_entropy_context(tx_size, a, l);
- txb_ctx->txb_skip_ctx = ctx_offset + ctx_base;
+ case TX_CLASS_VERT: {
+ const int row = coeff_idx >> bwl;
+ return ctx + nz_map_ctx_offset_1d[row];
+ break;
+ }
+ default: break;
}
+ return 0;
}
-#if LV_MAP_PROB
-void av1_init_txb_probs(FRAME_CONTEXT *fc);
-#endif // LV_MAP_PROB
+typedef aom_cdf_prob (*base_cdf_arr)[CDF_SIZE(4)];
+typedef aom_cdf_prob (*br_cdf_arr)[CDF_SIZE(BR_CDF_SIZE)];
-void av1_adapt_txb_probs(AV1_COMMON *cm, unsigned int count_sat,
- unsigned int update_factor);
+static INLINE int get_lower_levels_ctx_eob(int bwl, int height, int scan_idx) {
+ if (scan_idx == 0) return 0;
+ if (scan_idx <= (height << bwl) / 8) return 1;
+ if (scan_idx <= (height << bwl) / 4) return 2;
+ return 3;
+}
-void av1_init_lv_map(AV1_COMMON *cm);
+static INLINE int get_lower_levels_ctx_2d(const uint8_t *levels, int coeff_idx,
+ int bwl, TX_SIZE tx_size) {
+ assert(coeff_idx > 0);
+ int mag;
+ // Note: AOMMIN(level, 3) is useless for decoder since level < 3.
+ levels = levels + get_padded_idx(coeff_idx, bwl);
+ mag = AOMMIN(levels[1], 3); // { 0, 1 }
+ mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR], 3); // { 1, 0 }
+ mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR + 1], 3); // { 1, 1 }
+ mag += AOMMIN(levels[2], 3); // { 0, 2 }
+ mag += AOMMIN(levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)], 3); // { 2, 0 }
+
+ const int ctx = AOMMIN((mag + 1) >> 1, 4);
+ return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
+}
+static AOM_FORCE_INLINE int get_lower_levels_ctx(const uint8_t *levels,
+ int coeff_idx, int bwl,
+ TX_SIZE tx_size,
+ TX_CLASS tx_class) {
+ const int stats =
+ get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
+ return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
+}
-#if CONFIG_CTX1D
-static INLINE void get_eob_vert(int16_t *eob_ls, const tran_low_t *tcoeff,
- int w, int h) {
- for (int c = 0; c < w; ++c) {
- eob_ls[c] = 0;
- for (int r = h - 1; r >= 0; --r) {
- int coeff_idx = r * w + c;
- if (tcoeff[coeff_idx] != 0) {
- eob_ls[c] = r + 1;
- break;
- }
- }
+static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx,
+ int bwl, int height,
+ const uint8_t *levels,
+ int coeff_idx, TX_SIZE tx_size,
+ TX_CLASS tx_class) {
+ if (is_last) {
+ if (scan_idx == 0) return 0;
+ if (scan_idx <= (height << bwl) >> 3) return 1;
+ if (scan_idx <= (height << bwl) >> 2) return 2;
+ return 3;
}
+ return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
}
-static INLINE void get_eob_horiz(int16_t *eob_ls, const tran_low_t *tcoeff,
- int w, int h) {
- for (int r = 0; r < h; ++r) {
- eob_ls[r] = 0;
- for (int c = w - 1; c >= 0; --c) {
- int coeff_idx = r * w + c;
- if (tcoeff[coeff_idx] != 0) {
- eob_ls[r] = c + 1;
- break;
- }
- }
- }
+static INLINE void set_dc_sign(int *cul_level, int dc_val) {
+ if (dc_val < 0)
+ *cul_level |= 1 << COEFF_CONTEXT_BITS;
+ else if (dc_val > 0)
+ *cul_level += 2 << COEFF_CONTEXT_BITS;
}
-static INLINE int get_empty_line_ctx(int line_idx, int16_t *eob_ls) {
- if (line_idx > 0) {
- int prev_eob = eob_ls[line_idx - 1];
- if (prev_eob == 0) {
- return 1;
- } else if (prev_eob < 3) {
- return 2;
- } else if (prev_eob < 6) {
- return 3;
+static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize,
+ const TX_SIZE tx_size, const int plane,
+ const ENTROPY_CONTEXT *const a,
+ const ENTROPY_CONTEXT *const l,
+ TXB_CTX *const txb_ctx) {
+#define MAX_TX_SIZE_UNIT 16
+ static const int8_t signs[3] = { 0, -1, 1 };
+ static const int8_t dc_sign_contexts[4 * MAX_TX_SIZE_UNIT + 1] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+ };
+ const int txb_w_unit = tx_size_wide_unit[tx_size];
+ const int txb_h_unit = tx_size_high_unit[tx_size];
+ int dc_sign = 0;
+ int k = 0;
+
+ do {
+ const unsigned int sign = ((uint8_t)a[k]) >> COEFF_CONTEXT_BITS;
+ assert(sign <= 2);
+ dc_sign += signs[sign];
+ } while (++k < txb_w_unit);
+
+ k = 0;
+ do {
+ const unsigned int sign = ((uint8_t)l[k]) >> COEFF_CONTEXT_BITS;
+ assert(sign <= 2);
+ dc_sign += signs[sign];
+ } while (++k < txb_h_unit);
+
+ txb_ctx->dc_sign_ctx = dc_sign_contexts[dc_sign + 2 * MAX_TX_SIZE_UNIT];
+
+ if (plane == 0) {
+ if (plane_bsize == txsize_to_bsize[tx_size]) {
+ txb_ctx->txb_skip_ctx = 0;
} else {
- return 4;
+ // This is the algorithm to generate table skip_contexts[min][max].
+ // if (!max)
+ // txb_skip_ctx = 1;
+ // else if (!min)
+ // txb_skip_ctx = 2 + (max > 3);
+ // else if (max <= 3)
+ // txb_skip_ctx = 4;
+ // else if (min <= 3)
+ // txb_skip_ctx = 5;
+ // else
+ // txb_skip_ctx = 6;
+ static const uint8_t skip_contexts[5][5] = { { 1, 2, 2, 2, 3 },
+ { 1, 4, 4, 4, 5 },
+ { 1, 4, 4, 4, 5 },
+ { 1, 4, 4, 4, 5 },
+ { 1, 4, 4, 4, 6 } };
+ int top = 0;
+ int left = 0;
+
+ k = 0;
+ do {
+ top |= a[k];
+ } while (++k < txb_w_unit);
+ top &= COEFF_CONTEXT_MASK;
+
+ k = 0;
+ do {
+ left |= l[k];
+ } while (++k < txb_h_unit);
+ left &= COEFF_CONTEXT_MASK;
+ const int max = AOMMIN(top | left, 4);
+ const int min = AOMMIN(AOMMIN(top, left), 4);
+
+ txb_ctx->txb_skip_ctx = skip_contexts[min][max];
}
} else {
- return 0;
+ const int ctx_base = get_entropy_context(tx_size, a, l);
+ const int ctx_offset = (num_pels_log2_lookup[plane_bsize] >
+ num_pels_log2_lookup[txsize_to_bsize[tx_size]])
+ ? 10
+ : 7;
+ txb_ctx->txb_skip_ctx = ctx_base + ctx_offset;
}
+#undef MAX_TX_SIZE_UNIT
}
-#define MAX_POS_CTX 8
-static int pos_ctx[MAX_HVTX_SIZE] = {
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
-};
-static INLINE int get_hv_eob_ctx(int line_idx, int pos, int16_t *eob_ls) {
- if (line_idx > 0) {
- int prev_eob = eob_ls[line_idx - 1];
- int diff = pos + 1 - prev_eob;
- int abs_diff = abs(diff);
- int ctx_idx = pos_ctx[abs_diff];
- assert(ctx_idx < MAX_POS_CTX);
- if (diff < 0) {
- ctx_idx += MAX_POS_CTX;
- assert(ctx_idx >= MAX_POS_CTX);
- assert(ctx_idx < 2 * MAX_POS_CTX);
- }
- return ctx_idx;
- } else {
- int ctx_idx = MAX_POS_CTX + MAX_POS_CTX + pos_ctx[pos];
- assert(ctx_idx < HV_EOB_CONTEXTS);
- assert(HV_EOB_CONTEXTS == MAX_POS_CTX * 3);
- return ctx_idx;
- }
-}
-#endif // CONFIG_CTX1D
+void av1_init_lv_map(AV1_COMMON *cm);
#endif // AV1_COMMON_TXB_COMMON_H_
diff --git a/third_party/aom/av1/common/warped_motion.c b/third_party/aom/av1/common/warped_motion.c
index 34374af69..ae6f07657 100644
--- a/third_party/aom/av1/common/warped_motion.c
+++ b/third_party/aom/av1/common/warped_motion.c
@@ -15,7 +15,8 @@
#include <math.h>
#include <assert.h>
-#include "./av1_rtcd.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/warped_motion.h"
#include "av1/common/scale.h"
@@ -91,78 +92,11 @@ static const int error_measure_lut[512] = {
};
/* clang-format on */
-static ProjectPointsFunc get_project_points_type(TransformationType type) {
- switch (type) {
- case VERTRAPEZOID: return project_points_vertrapezoid;
- case HORTRAPEZOID: return project_points_hortrapezoid;
- case HOMOGRAPHY: return project_points_homography;
- case AFFINE: return project_points_affine;
- case ROTZOOM: return project_points_rotzoom;
- case TRANSLATION: return project_points_translation;
- default: assert(0); return NULL;
- }
-}
-
-void project_points_translation(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y) {
- int i;
- for (i = 0; i < n; ++i) {
- const int x = *(points++), y = *(points++);
- if (subsampling_x)
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
- WARPEDDIFF_PREC_BITS + 1);
- else
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
- if (subsampling_y)
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
- WARPEDDIFF_PREC_BITS + 1);
- else
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-void project_points_rotzoom(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y) {
- int i;
- for (i = 0; i < n; ++i) {
- const int x = *(points++), y = *(points++);
- if (subsampling_x)
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
- (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
- WARPEDDIFF_PREC_BITS + 1);
- else
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
- WARPEDDIFF_PREC_BITS);
- if (subsampling_y)
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
- -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
- (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
- WARPEDDIFF_PREC_BITS + 1);
- else
- *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
- WARPEDDIFF_PREC_BITS);
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
void project_points_affine(const int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y) {
- int i;
- for (i = 0; i < n; ++i) {
+ for (int i = 0; i < n; ++i) {
const int x = *(points++), y = *(points++);
if (subsampling_x)
*(proj++) = ROUND_POWER_OF_TWO_SIGNED(
@@ -185,301 +119,6 @@ void project_points_affine(const int32_t *mat, int *points, int *proj,
}
}
-void project_points_hortrapezoid(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y) {
- int i;
- int64_t x, y, Z;
- int64_t xp, yp;
- for (i = 0; i < n; ++i) {
- x = *(points++), y = *(points++);
- x = (subsampling_x ? 4 * x + 1 : 2 * x);
- y = (subsampling_y ? 4 * y + 1 : 2 * y);
-
- Z = (mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
- xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
- yp = (mat[5] * y + 2 * mat[1]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
-
- xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
- yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
-
- if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- *(proj++) = (int)xp;
- *(proj++) = (int)yp;
-
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-void project_points_vertrapezoid(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y) {
- int i;
- int64_t x, y, Z;
- int64_t xp, yp;
- for (i = 0; i < n; ++i) {
- x = *(points++), y = *(points++);
- x = (subsampling_x ? 4 * x + 1 : 2 * x);
- y = (subsampling_y ? 4 * y + 1 : 2 * y);
-
- Z = (mat[6] * x + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
- xp = (mat[2] * x + 2 * mat[0]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
- yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
-
- xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
- yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
-
- if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- *(proj++) = (int)xp;
- *(proj++) = (int)yp;
-
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-void project_points_homography(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y) {
- int i;
- int64_t x, y, Z;
- int64_t xp, yp;
- for (i = 0; i < n; ++i) {
- x = *(points++), y = *(points++);
- x = (subsampling_x ? 4 * x + 1 : 2 * x);
- y = (subsampling_y ? 4 * y + 1 : 2 * y);
-
- Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
- xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
- yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
- (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
- WARPEDMODEL_PREC_BITS));
-
- xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
- yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
-
- if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
- *(proj++) = (int)xp;
- *(proj++) = (int)yp;
-
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-static const int16_t
- filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
-#if WARPEDPIXEL_PREC_BITS == 6
- { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },
- { 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 },
- { 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 },
- { 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 },
- { 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 },
- { 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 },
- { 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 },
- { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
- { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
- { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
- { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
- { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
- { 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 },
- { 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 },
- { 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 },
- { 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 },
- { 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 },
- { 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 },
- { 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 },
- { 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 },
- { 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 },
- { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
- { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
- { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
- { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
- { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
- { 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 },
- { 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 },
- { 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 },
- { 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 },
- { 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 },
- { 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },
-#elif WARPEDPIXEL_PREC_BITS == 5
- { 0, 0, 128, 0, 0, 0 }, { 1, -3, 127, 4, -1, 0 },
- { 1, -5, 126, 8, -3, 1 }, { 1, -7, 124, 13, -4, 1 },
- { 2, -9, 122, 18, -6, 1 }, { 2, -11, 120, 22, -7, 2 },
- { 3, -13, 117, 27, -8, 2 }, { 3, -14, 114, 32, -10, 3 },
- { 3, -15, 111, 37, -11, 3 }, { 3, -16, 108, 42, -12, 3 },
- { 4, -17, 104, 47, -13, 3 }, { 4, -17, 100, 52, -14, 3 },
- { 4, -18, 96, 58, -15, 3 }, { 4, -18, 91, 63, -16, 4 },
- { 4, -18, 87, 68, -17, 4 }, { 4, -18, 82, 73, -17, 4 },
- { 4, -18, 78, 78, -18, 4 }, { 4, -17, 73, 82, -18, 4 },
- { 4, -17, 68, 87, -18, 4 }, { 4, -16, 63, 91, -18, 4 },
- { 3, -15, 58, 96, -18, 4 }, { 3, -14, 52, 100, -17, 4 },
- { 3, -13, 47, 104, -17, 4 }, { 3, -12, 42, 108, -16, 3 },
- { 3, -11, 37, 111, -15, 3 }, { 3, -10, 32, 114, -14, 3 },
- { 2, -8, 27, 117, -13, 3 }, { 2, -7, 22, 120, -11, 2 },
- { 1, -6, 18, 122, -9, 2 }, { 1, -4, 13, 124, -7, 1 },
- { 1, -3, 8, 126, -5, 1 }, { 0, -1, 4, 127, -3, 1 },
-#endif // WARPEDPIXEL_PREC_BITS == 6
- };
-
-static int32_t do_ntap_filter(const int32_t *const p, int x) {
- int i;
- int32_t sum = 0;
- for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
- sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
- }
- return sum;
-}
-
-static int32_t do_cubic_filter(const int32_t *const p, int x) {
- if (x == 0) {
- return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
- } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
- return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
- } else {
- const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
- const int64_t v2 =
- (int64_t)x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
- const int64_t v3 = x * (p[1] - p[-1]);
- const int64_t v4 = 2 * p[0];
- return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
- (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
- (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
- (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
- 3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
- }
-}
-
-static INLINE void get_subcolumn(int taps, const uint8_t *const ref,
- int32_t *col, int stride, int x, int y_start) {
- int i;
- for (i = 0; i < taps; ++i) {
- col[i] = ref[(i + y_start) * stride + x];
- }
-}
-
-static uint8_t bi_ntap_filter(const uint8_t *const ref, int x, int y,
- int stride) {
- int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
- int k;
- const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
- const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
- for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
- int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
- get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
- i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
- j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
- arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
- y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
- }
- val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
- x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
- val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
- return (uint8_t)clip_pixel(val);
-}
-
-static uint8_t bi_cubic_filter(const uint8_t *const ref, int x, int y,
- int stride) {
- int32_t val, arr[4];
- int k;
- const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
- const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
- for (k = 0; k < 4; ++k) {
- int32_t arr_temp[4];
- get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
- arr[k] =
- do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
- }
- val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
- val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
- return (uint8_t)clip_pixel(val);
-}
-
-static uint8_t bi_linear_filter(const uint8_t *const ref, int x, int y,
- int stride) {
- const int ix = x >> WARPEDPIXEL_PREC_BITS;
- const int iy = y >> WARPEDPIXEL_PREC_BITS;
- const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
- const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
- int32_t val;
- val = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
- (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
- ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[(iy + 1) * stride + ix + 1] * sy * sx,
- WARPEDPIXEL_PREC_BITS * 2);
- return (uint8_t)clip_pixel(val);
-}
-
-static uint8_t warp_interpolate(const uint8_t *const ref, int x, int y,
- int width, int height, int stride) {
- const int ix = x >> WARPEDPIXEL_PREC_BITS;
- const int iy = y >> WARPEDPIXEL_PREC_BITS;
- const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
- const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
- int32_t v;
-
- if (ix < 0 && iy < 0)
- return ref[0];
- else if (ix < 0 && iy >= height - 1)
- return ref[(height - 1) * stride];
- else if (ix >= width - 1 && iy < 0)
- return ref[width - 1];
- else if (ix >= width - 1 && iy >= height - 1)
- return ref[(height - 1) * stride + (width - 1)];
- else if (ix < 0) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
- ref[(iy + 1) * stride] * sy,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel(v);
- } else if (iy < 0) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel(v);
- } else if (ix >= width - 1) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
- ref[(iy + 1) * stride + width - 1] * sy,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel(v);
- } else if (iy >= height - 1) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[(height - 1) * stride + ix + 1] * sx,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel(v);
- } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
- iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
- ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
- iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
- return bi_ntap_filter(ref, x, y, stride);
- } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
- return bi_cubic_filter(ref, x, y, stride);
- } else {
- return bi_linear_filter(ref, x, y, stride);
- }
-}
-
// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
// at a time. The zoom/rotation/shear in the model are applied to the
// "fractional" position of each pixel, which therefore varies within
@@ -683,15 +322,14 @@ static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
8240, 8224, 8208, 8192,
};
-#if CONFIG_WARPED_MOTION
// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
// at precision of DIV_LUT_PREC_BITS along with the shift.
static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
- int64_t e, f;
+ int64_t f;
*shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
: get_msb((unsigned int)D));
// e is obtained from D after resetting the most significant 1 bit.
- e = D - ((uint64_t)1 << *shift);
+ const int64_t e = D - ((uint64_t)1 << *shift);
// Get the most significant DIV_LUT_BITS (8) bits of e into f
if (*shift > DIV_LUT_BITS)
f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);
@@ -702,13 +340,12 @@ static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
// Use f as lookup into the precomputed table of multipliers
return div_lut[f];
}
-#endif // CONFIG_WARPED_MOTION
static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
- int32_t e, f;
+ int32_t f;
*shift = get_msb(D);
// e is obtained from D after resetting the most significant 1 bit.
- e = D - ((uint32_t)1 << *shift);
+ const int32_t e = D - ((uint32_t)1 << *shift);
// Get the most significant DIV_LUT_BITS (8) bits of e into f
if (*shift > DIV_LUT_BITS)
f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);
@@ -743,16 +380,13 @@ int get_shear_params(WarpedMotionParams *wm) {
wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
int16_t shift;
int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
- int64_t v;
- v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
+ int64_t v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
wm->gamma =
clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
v = ((int64_t)mat[3] * mat[4]) * y;
wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
(1 << WARPEDMODEL_PREC_BITS),
INT16_MIN, INT16_MAX);
- if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
- return 0;
wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) *
(1 << WARP_PARAM_REDUCE_BITS);
@@ -762,171 +396,24 @@ int get_shear_params(WarpedMotionParams *wm) {
(1 << WARP_PARAM_REDUCE_BITS);
wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) *
(1 << WARP_PARAM_REDUCE_BITS);
- return 1;
-}
-
-#if CONFIG_HIGHBITDEPTH
-static INLINE void highbd_get_subcolumn(int taps, const uint16_t *const ref,
- int32_t *col, int stride, int x,
- int y_start) {
- int i;
- for (i = 0; i < taps; ++i) {
- col[i] = ref[(i + y_start) * stride + x];
- }
-}
-
-static uint16_t highbd_bi_ntap_filter(const uint16_t *const ref, int x, int y,
- int stride, int bd) {
- int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
- int k;
- const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
- const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
- for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
- int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
- highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
- i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
- j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
- arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
- y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
- }
- val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
- x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
- val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
- return (uint16_t)clip_pixel_highbd(val, bd);
-}
-
-static uint16_t highbd_bi_cubic_filter(const uint16_t *const ref, int x, int y,
- int stride, int bd) {
- int32_t val, arr[4];
- int k;
- const int i = (int)x >> WARPEDPIXEL_PREC_BITS;
- const int j = (int)y >> WARPEDPIXEL_PREC_BITS;
- for (k = 0; k < 4; ++k) {
- int32_t arr_temp[4];
- highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
- arr[k] =
- do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
- }
- val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
- val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
- return (uint16_t)clip_pixel_highbd(val, bd);
-}
-static uint16_t highbd_bi_linear_filter(const uint16_t *const ref, int x, int y,
- int stride, int bd) {
- const int ix = x >> WARPEDPIXEL_PREC_BITS;
- const int iy = y >> WARPEDPIXEL_PREC_BITS;
- const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
- const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
- int32_t val;
- val = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
- (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
- ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[(iy + 1) * stride + ix + 1] * sy * sx,
- WARPEDPIXEL_PREC_BITS * 2);
- return (uint16_t)clip_pixel_highbd(val, bd);
-}
+ if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
+ return 0;
-static uint16_t highbd_warp_interpolate(const uint16_t *const ref, int x, int y,
- int width, int height, int stride,
- int bd) {
- const int ix = x >> WARPEDPIXEL_PREC_BITS;
- const int iy = y >> WARPEDPIXEL_PREC_BITS;
- const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
- const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
- int32_t v;
-
- if (ix < 0 && iy < 0)
- return ref[0];
- else if (ix < 0 && iy > height - 1)
- return ref[(height - 1) * stride];
- else if (ix > width - 1 && iy < 0)
- return ref[width - 1];
- else if (ix > width - 1 && iy > height - 1)
- return ref[(height - 1) * stride + (width - 1)];
- else if (ix < 0) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
- ref[(iy + 1) * stride] * sy,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel_highbd(v, bd);
- } else if (iy < 0) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel_highbd(v, bd);
- } else if (ix > width - 1) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
- ref[(iy + 1) * stride + width - 1] * sy,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel_highbd(v, bd);
- } else if (iy > height - 1) {
- v = ROUND_POWER_OF_TWO_SIGNED(
- ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
- ref[(height - 1) * stride + ix + 1] * sx,
- WARPEDPIXEL_PREC_BITS);
- return clip_pixel_highbd(v, bd);
- } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
- iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
- ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
- iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
- return highbd_bi_ntap_filter(ref, x, y, stride, bd);
- } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
- return highbd_bi_cubic_filter(ref, x, y, stride, bd);
- } else {
- return highbd_bi_linear_filter(ref, x, y, stride, bd);
- }
+ return 1;
}
static INLINE int highbd_error_measure(int err, int bd) {
const int b = bd - 8;
const int bmask = (1 << b) - 1;
const int v = (1 << b);
- int e1, e2;
err = abs(err);
- e1 = err >> b;
- e2 = err & bmask;
+ const int e1 = err >> b;
+ const int e2 = err & bmask;
return error_measure_lut[255 + e1] * (v - e2) +
error_measure_lut[256 + e1] * e2;
}
-static void highbd_warp_plane_old(const WarpedMotionParams *const wm,
- const uint8_t *const ref8, int width,
- int height, int stride,
- const uint8_t *const pred8, int p_col,
- int p_row, int p_width, int p_height,
- int p_stride, int subsampling_x,
- int subsampling_y, int x_scale, int y_scale,
- int bd, ConvolveParams *conv_params) {
- int i, j;
- ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
- if (projectpoints == NULL) return;
- for (i = p_row; i < p_row + p_height; ++i) {
- for (j = p_col; j < p_col + p_width; ++j) {
- int in[2], out[2];
- in[0] = j;
- in[1] = i;
- projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
- out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS);
- out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS);
- if (conv_params->do_average)
- pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
- pred[(j - p_col) + (i - p_row) * p_stride] +
- highbd_warp_interpolate(ref, out[0], out[1], width, height,
- stride, bd),
- 1);
- else
- pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
- ref, out[0], out[1], width, height, stride, bd);
- }
- }
-}
-
/* Note: For an explanation of the warp algorithm, and some notes on bit widths
for hardware implementations, see the comments above av1_warp_affine_c
*/
@@ -938,37 +425,23 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8];
- int i, j, k, l, m;
-#if CONFIG_CONVOLVE_ROUND
- const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
const int reduce_bits_horiz =
- use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
- const int max_bits_horiz =
- use_conv_params
- ? bd + FILTER_BITS + 1 - conv_params->round_0
- : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz =
- use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
- const int offset_bits_vert =
- use_conv_params
- ? bd + 2 * FILTER_BITS - conv_params->round_0
- : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
- const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
- const int max_bits_horiz =
- bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
- const int offset_bits_vert =
- bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
-#endif
+ conv_params->round_0 +
+ AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+ const int reduce_bits_vert = conv_params->is_compound
+ ? conv_params->round_1
+ : 2 * FILTER_BITS - reduce_bits_horiz;
+ const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
+ const int offset_bits_horiz = bd + FILTER_BITS - 1;
+ const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
(void)max_bits_horiz;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
- for (i = p_row; i < p_row + p_height; i += 8) {
- for (j = p_col; j < p_col + p_width; j += 8) {
+ for (int i = p_row; i < p_row + p_height; i += 8) {
+ for (int j = p_col; j < p_col + p_width; j += 8) {
// Calculate the center of this 8x8 block,
// project to luma coordinates (if in a subsampled chroma plane),
// apply the affine transformation,
@@ -980,9 +453,9 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+ const int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+ const int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
sx4 += alpha * (-4) + beta * (-4);
@@ -992,15 +465,11 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
// Horizontal filter
- for (k = -7; k < 8; ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
+ for (int k = -7; k < 8; ++k) {
+ const int iy = clamp(iy4 + k, 0, height - 1);
int sx = sx4 + beta * (k + 4);
- for (l = -4; l < 4; ++l) {
+ for (int l = -4; l < 4; ++l) {
int ix = ix4 + l - 3;
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
@@ -1008,12 +477,8 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 1 << offset_bits_horiz;
- for (m = 0; m < 8; ++m) {
- int sample_x = ix + m;
- if (sample_x < 0)
- sample_x = 0;
- else if (sample_x > width - 1)
- sample_x = width - 1;
+ for (int m = 0; m < 8; ++m) {
+ const int sample_x = clamp(ix + m, 0, width - 1);
sum += ref[iy * stride + sample_x] * coeffs[m];
}
sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
@@ -1024,46 +489,50 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
}
// Vertical filter
- for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
+ for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
int sy = sy4 + delta * (k + 4);
- for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
+ for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 1 << offset_bits_vert;
- for (m = 0; m < 8; ++m) {
+ for (int m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
-#if CONFIG_CONVOLVE_ROUND
- if (use_conv_params) {
+
+ if (conv_params->is_compound) {
CONV_BUF_TYPE *p =
&conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride +
(j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- (1 << (offset_bits_horiz + FILTER_BITS -
- conv_params->round_0 - conv_params->round_1)) -
- (1 << (offset_bits_vert - conv_params->round_1));
- if (conv_params->do_average)
- *p += sum;
- else
+ sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
+ if (conv_params->do_average) {
+ uint16_t *dst16 =
+ &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
+ int32_t tmp32 = *p;
+ if (conv_params->use_jnt_comp_avg) {
+ tmp32 = tmp32 * conv_params->fwd_offset +
+ sum * conv_params->bck_offset;
+ tmp32 = tmp32 >> DIST_PRECISION_BITS;
+ } else {
+ tmp32 += sum;
+ tmp32 = tmp32 >> 1;
+ }
+ tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ *dst16 =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd);
+ } else {
*p = sum;
+ }
} else {
-#else
- {
-#endif
uint16_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
+ sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
assert(0 <= sum && sum < (1 << (bd + 2)));
- uint16_t px =
- clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
- if (conv_params->do_average)
- *p = ROUND_POWER_OF_TWO(*p + px, 1);
- else
- *p = px;
+ *p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
}
sy += gamma;
}
@@ -1076,32 +545,25 @@ static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
int width, int height, int stride,
const uint8_t *const pred8, int p_col, int p_row,
int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int x_scale,
- int y_scale, int bd,
+ int subsampling_x, int subsampling_y, int bd,
ConvolveParams *conv_params) {
+ assert(wm->wmtype <= AFFINE);
if (wm->wmtype == ROTZOOM) {
wm->wmmat[5] = wm->wmmat[2];
wm->wmmat[4] = -wm->wmmat[3];
}
- if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) &&
- x_scale == SCALE_SUBPEL_SHIFTS && y_scale == SCALE_SUBPEL_SHIFTS) {
- const int32_t *const mat = wm->wmmat;
- const int16_t alpha = wm->alpha;
- const int16_t beta = wm->beta;
- const int16_t gamma = wm->gamma;
- const int16_t delta = wm->delta;
-
- const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x,
- subsampling_y, bd, conv_params, alpha, beta, gamma,
- delta);
- } else {
- highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x,
- subsampling_y, x_scale, y_scale, bd, conv_params);
- }
+ const int32_t *const mat = wm->wmmat;
+ const int16_t alpha = wm->alpha;
+ const int16_t beta = wm->beta;
+ const int16_t gamma = wm->gamma;
+ const int16_t delta = wm->delta;
+
+ const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
+ p_width, p_height, p_stride, subsampling_x,
+ subsampling_y, bd, conv_params, alpha, beta, gamma,
+ delta);
}
static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
@@ -1120,25 +582,25 @@ static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
static int64_t highbd_warp_error(
WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height,
int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x, int subsampling_y,
- int x_scale, int y_scale, int bd, int64_t best_error) {
+ int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd,
+ int64_t best_error) {
int64_t gm_sumerr = 0;
- int warp_w, warp_h;
- int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
- int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+ const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+ const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
- ConvolveParams conv_params = get_conv_params(0, 0, 0);
+ ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
+ conv_params.use_jnt_comp_avg = 0;
for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
// avoid warping extra 8x8 blocks in the padded region of the frame
// when p_width and p_height are not multiples of WARP_ERROR_BLOCK
- warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
- warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
+ const int warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
+ const int warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
highbd_warp_plane(wm, ref8, width, height, stride,
CONVERT_TO_BYTEPTR(tmp), j, i, warp_w, warp_h,
- WARP_ERROR_BLOCK, subsampling_x, subsampling_y, x_scale,
- y_scale, bd, &conv_params);
+ WARP_ERROR_BLOCK, subsampling_x, subsampling_y, bd,
+ &conv_params);
gm_sumerr += highbd_frame_error(
tmp, WARP_ERROR_BLOCK, CONVERT_TO_SHORTPTR(dst8) + j + i * p_stride,
@@ -1148,41 +610,11 @@ static int64_t highbd_warp_error(
}
return gm_sumerr;
}
-#endif // CONFIG_HIGHBITDEPTH
static INLINE int error_measure(int err) {
return error_measure_lut[255 + err];
}
-static void warp_plane_old(const WarpedMotionParams *const wm,
- const uint8_t *const ref, int width, int height,
- int stride, uint8_t *pred, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int x_scale,
- int y_scale, ConvolveParams *conv_params) {
- int i, j;
- ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
- if (projectpoints == NULL) return;
- for (i = p_row; i < p_row + p_height; ++i) {
- for (j = p_col; j < p_col + p_width; ++j) {
- int in[2], out[2];
- in[0] = j;
- in[1] = i;
- projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
- out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS);
- out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS);
- if (conv_params->do_average)
- pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
- pred[(j - p_col) + (i - p_row) * p_stride] +
- warp_interpolate(ref, out[0], out[1], width, height, stride),
- 1);
- else
- pred[(j - p_col) + (i - p_row) * p_stride] =
- warp_interpolate(ref, out[0], out[1], width, height, stride);
- }
- }
-}
-
/* The warp filter for ROTZOOM and AFFINE models works as follows:
* Split the input into 8x8 blocks
* For each block, project the point (4, 4) within the block, to get the
@@ -1237,10 +669,10 @@ static void warp_plane_old(const WarpedMotionParams *const wm,
This allows the derivation of the appropriate bit widths and offsets for
the various intermediate values: If
- F := WARPEDPIXEL_FILTER_BITS = 7 (or else the above ranges need adjusting)
+ F := FILTER_BITS = 7 (or else the above ranges need adjusting)
So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
intermediate value.
- H := HORSHEAR_REDUCE_PREC_BITS
+ H := ROUND0_BITS
V := VERSHEAR_REDUCE_PREC_BITS
(and note that we must have H + V = 2*F for the output to have the same
scale as the input)
@@ -1275,38 +707,23 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
ConvolveParams *conv_params, int16_t alpha, int16_t beta,
int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8];
- int i, j, k, l, m;
const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
- const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
- const int reduce_bits_horiz =
- use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
- const int max_bits_horiz =
- use_conv_params
- ? bd + FILTER_BITS + 1 - conv_params->round_0
- : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz =
- use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
- const int offset_bits_vert =
- use_conv_params
- ? bd + 2 * FILTER_BITS - conv_params->round_0
- : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
- const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
- const int max_bits_horiz =
- bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
- const int offset_bits_vert =
- bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
-#endif
+ const int reduce_bits_horiz = conv_params->round_0;
+ const int reduce_bits_vert = conv_params->is_compound
+ ? conv_params->round_1
+ : 2 * FILTER_BITS - reduce_bits_horiz;
+ const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
+ const int offset_bits_horiz = bd + FILTER_BITS - 1;
+ const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
(void)max_bits_horiz;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+ assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
- for (i = p_row; i < p_row + p_height; i += 8) {
- for (j = p_col; j < p_col + p_width; j += 8) {
+ for (int i = p_row; i < p_row + p_height; i += 8) {
+ for (int j = p_col; j < p_col + p_width; j += 8) {
// Calculate the center of this 8x8 block,
// project to luma coordinates (if in a subsampled chroma plane),
// apply the affine transformation,
@@ -1330,17 +747,13 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
// Horizontal filter
- for (k = -7; k < 8; ++k) {
+ for (int k = -7; k < 8; ++k) {
// Clamp to top/bottom edge of the frame
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
+ const int iy = clamp(iy4 + k, 0, height - 1);
int sx = sx4 + beta * (k + 4);
- for (l = -4; l < 4; ++l) {
+ for (int l = -4; l < 4; ++l) {
int ix = ix4 + l - 3;
// At this point, sx = sx4 + alpha * l + beta * k
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
@@ -1349,13 +762,9 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 1 << offset_bits_horiz;
- for (m = 0; m < 8; ++m) {
+ for (int m = 0; m < 8; ++m) {
// Clamp to left/right edge of the frame
- int sample_x = ix + m;
- if (sample_x < 0)
- sample_x = 0;
- else if (sample_x > width - 1)
- sample_x = width - 1;
+ const int sample_x = clamp(ix + m, 0, width - 1);
sum += ref[iy * stride + sample_x] * coeffs[m];
}
@@ -1367,9 +776,9 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
}
// Vertical filter
- for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
+ for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
int sy = sy4 + delta * (k + 4);
- for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
+ for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
// At this point, sy = sy4 + gamma * l + delta * k
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
@@ -1377,36 +786,40 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 1 << offset_bits_vert;
- for (m = 0; m < 8; ++m) {
+ for (int m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
-#if CONFIG_CONVOLVE_ROUND
- if (use_conv_params) {
+
+ if (conv_params->is_compound) {
CONV_BUF_TYPE *p =
&conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride +
(j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- (1 << (offset_bits_horiz + FILTER_BITS -
- conv_params->round_0 - conv_params->round_1)) -
- (1 << (offset_bits_vert - conv_params->round_1));
- if (conv_params->do_average)
- *p += sum;
- else
+ sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
+ if (conv_params->do_average) {
+ uint8_t *dst8 =
+ &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
+ int32_t tmp32 = *p;
+ if (conv_params->use_jnt_comp_avg) {
+ tmp32 = tmp32 * conv_params->fwd_offset +
+ sum * conv_params->bck_offset;
+ tmp32 = tmp32 >> DIST_PRECISION_BITS;
+ } else {
+ tmp32 += sum;
+ tmp32 = tmp32 >> 1;
+ }
+ tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
+ (1 << (offset_bits - conv_params->round_1 - 1));
+ *dst8 = clip_pixel(ROUND_POWER_OF_TWO(tmp32, round_bits));
+ } else {
*p = sum;
+ }
} else {
-#else
- {
-#endif
uint8_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
+ sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
assert(0 <= sum && sum < (1 << (bd + 2)));
- uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
- if (conv_params->do_average)
- *p = ROUND_POWER_OF_TWO(*p + px, 1);
- else
- *p = px;
+ *p = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
}
sy += gamma;
}
@@ -1419,27 +832,20 @@ static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
int width, int height, int stride, uint8_t *pred,
int p_col, int p_row, int p_width, int p_height,
int p_stride, int subsampling_x, int subsampling_y,
- int x_scale, int y_scale, ConvolveParams *conv_params) {
+ ConvolveParams *conv_params) {
+ assert(wm->wmtype <= AFFINE);
if (wm->wmtype == ROTZOOM) {
wm->wmmat[5] = wm->wmmat[2];
wm->wmmat[4] = -wm->wmmat[3];
}
- if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) &&
- x_scale == SCALE_SUBPEL_SHIFTS && y_scale == SCALE_SUBPEL_SHIFTS) {
- const int32_t *const mat = wm->wmmat;
- const int16_t alpha = wm->alpha;
- const int16_t beta = wm->beta;
- const int16_t gamma = wm->gamma;
- const int16_t delta = wm->delta;
-
- av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x, subsampling_y,
- conv_params, alpha, beta, gamma, delta);
- } else {
- warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y, x_scale,
- y_scale, conv_params);
- }
+ const int32_t *const mat = wm->wmmat;
+ const int16_t alpha = wm->alpha;
+ const int16_t beta = wm->beta;
+ const int16_t gamma = wm->gamma;
+ const int16_t delta = wm->delta;
+ av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width,
+ p_height, p_stride, subsampling_x, subsampling_y, conv_params,
+ alpha, beta, gamma, delta);
}
static int64_t frame_error(const uint8_t *const ref, int stride,
@@ -1459,14 +865,15 @@ static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
int width, int height, int stride,
const uint8_t *const dst, int p_col, int p_row,
int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int x_scale,
- int y_scale, int64_t best_error) {
+ int subsampling_x, int subsampling_y,
+ int64_t best_error) {
int64_t gm_sumerr = 0;
int warp_w, warp_h;
int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
- ConvolveParams conv_params = get_conv_params(0, 0, 0);
+ ConvolveParams conv_params = get_conv_params(0, 0, 0, 8);
+ conv_params.use_jnt_comp_avg = 0;
for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
@@ -1475,8 +882,7 @@ static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w, warp_h,
- WARP_ERROR_BLOCK, subsampling_x, subsampling_y, x_scale,
- y_scale, &conv_params);
+ WARP_ERROR_BLOCK, subsampling_x, subsampling_y, &conv_params);
gm_sumerr += frame_error(tmp, WARP_ERROR_BLOCK, dst + j + i * p_stride,
warp_w, warp_h, p_stride);
@@ -1486,70 +892,49 @@ static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
return gm_sumerr;
}
-int64_t av1_frame_error(
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
- const uint8_t *ref, int stride, uint8_t *dst, int p_width, int p_height,
- int p_stride) {
-#if CONFIG_HIGHBITDEPTH
+int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
+ uint8_t *dst, int p_width, int p_height, int p_stride) {
if (use_hbd) {
return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride,
CONVERT_TO_SHORTPTR(dst), p_width, p_height,
p_stride, bd);
}
-#endif // CONFIG_HIGHBITDEPTH
return frame_error(ref, stride, dst, p_width, p_height, p_stride);
}
-int64_t av1_warp_error(WarpedMotionParams *wm,
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
+int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd,
const uint8_t *ref, int width, int height, int stride,
uint8_t *dst, int p_col, int p_row, int p_width,
int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int x_scale, int y_scale,
- int64_t best_error) {
+ int subsampling_y, int64_t best_error) {
if (wm->wmtype <= AFFINE)
if (!get_shear_params(wm)) return 1;
-#if CONFIG_HIGHBITDEPTH
if (use_hbd)
return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
- subsampling_y, x_scale, y_scale, bd, best_error);
-#endif // CONFIG_HIGHBITDEPTH
+ subsampling_y, bd, best_error);
return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y, x_scale,
- y_scale, best_error);
+ p_height, p_stride, subsampling_x, subsampling_y,
+ best_error);
}
-void av1_warp_plane(WarpedMotionParams *wm,
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
+void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
const uint8_t *ref, int width, int height, int stride,
uint8_t *pred, int p_col, int p_row, int p_width,
int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int x_scale, int y_scale,
- ConvolveParams *conv_params) {
-#if CONFIG_HIGHBITDEPTH
+ int subsampling_y, ConvolveParams *conv_params) {
if (use_hbd)
highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x, subsampling_y,
- x_scale, y_scale, bd, conv_params);
+ bd, conv_params);
else
-#endif // CONFIG_HIGHBITDEPTH
warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y, x_scale,
- y_scale, conv_params);
+ p_height, p_stride, subsampling_x, subsampling_y, conv_params);
}
-#if CONFIG_WARPED_MOTION
-#define LEAST_SQUARES_ORDER 2
-
#define LS_MV_MAX 256 // max mv in 1/8-pel
-#define LS_STEP 2
+// Use LS_STEP = 8 so that 2 less bits needed for A, Bx, By.
+#define LS_STEP 8
// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
// the precision needed is:
@@ -1570,13 +955,17 @@ void av1_warp_plane(WarpedMotionParams *wm,
#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
-#define LS_SUM(a) ((a)*4 + LS_STEP * 2)
-#define LS_SQUARE(a) \
- (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
-#define LS_PRODUCT1(a, b) \
- (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> 2)
-#define LS_PRODUCT2(a, b) \
- (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
+// By setting LS_STEP = 8, the least 2 bits of every elements in A, Bx, By are
+// 0. So, we can reduce LS_MAT_RANGE_BITS(2) bits here.
+#define LS_SQUARE(a) \
+ (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
+ (2 + LS_MAT_DOWN_BITS))
+#define LS_PRODUCT1(a, b) \
+ (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> \
+ (2 + LS_MAT_DOWN_BITS))
+#define LS_PRODUCT2(a, b) \
+ (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
+ (2 + LS_MAT_DOWN_BITS))
#define USE_LIMITED_PREC_MULT 0
@@ -1655,22 +1044,24 @@ static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
}
#endif // USE_LIMITED_PREC_MULT
-static int find_affine_int(int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
- int mvy, int mvx, WarpedMotionParams *wm, int mi_row,
- int mi_col) {
+static int find_affine_int(int np, const int *pts1, const int *pts2,
+ BLOCK_SIZE bsize, int mvy, int mvx,
+ WarpedMotionParams *wm, int mi_row, int mi_col) {
int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
int32_t Bx[2] = { 0, 0 };
int32_t By[2] = { 0, 0 };
- int i, n = 0;
+ int i;
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
- const int isuy = (mi_row * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1);
- const int isux = (mi_col * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1);
- const int suy = isuy * 8;
- const int sux = isux * 8;
+ const int rsuy = (AOMMAX(bh, MI_SIZE) / 2 - 1);
+ const int rsux = (AOMMAX(bw, MI_SIZE) / 2 - 1);
+ const int suy = rsuy * 8;
+ const int sux = rsux * 8;
const int duy = suy + mvy;
const int dux = sux + mvx;
+ const int isuy = (mi_row * MI_SIZE + rsuy);
+ const int isux = (mi_col * MI_SIZE + rsux);
// Assume the center pixel of the block has exactly the same motion vector
// as transmitted for the block. First shift the origin of the source
@@ -1694,13 +1085,15 @@ static int find_affine_int(int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
//
// The loop below computes: A = P'P, Bx = P'q, By = P'r
// We need to just compute inv(A).Bx and inv(A).By for the solutions.
- int sx, sy, dx, dy;
// Contribution from neighbor block
- for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {
- dx = pts2[i * 2] - dux;
- dy = pts2[i * 2 + 1] - duy;
- sx = pts1[i * 2] - sux;
- sy = pts1[i * 2 + 1] - suy;
+ for (i = 0; i < np; i++) {
+ const int dx = pts2[i * 2] - dux;
+ const int dy = pts2[i * 2 + 1] - duy;
+ const int sx = pts1[i * 2] - sux;
+ const int sy = pts1[i * 2 + 1] - suy;
+ // (TODO)yunqing: This comparison wouldn't be necessary if the sample
+ // selection is done in find_samples(). Also, global offset can be removed
+ // while collecting samples.
if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
A[0][0] += LS_SQUARE(sx);
A[0][1] += LS_PRODUCT1(sx, sy);
@@ -1709,41 +1102,20 @@ static int find_affine_int(int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
Bx[1] += LS_PRODUCT1(sy, dx);
By[0] += LS_PRODUCT1(sx, dy);
By[1] += LS_PRODUCT2(sy, dy);
- n++;
}
}
- int downshift;
- if (n >= 4)
- downshift = LS_MAT_DOWN_BITS;
- else if (n >= 2)
- downshift = LS_MAT_DOWN_BITS - 1;
- else
- downshift = LS_MAT_DOWN_BITS - 2;
-
- // Reduce precision by downshift bits
- A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
- By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,
- LS_MAT_MAX);
-
- int64_t Px[2], Py[2], Det;
- int16_t iDet, shift;
- // These divided by the Det, are the least squares solutions
- Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
- Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
- Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
- Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
+ // Just for debugging, and can be removed later.
+ assert(A[0][0] >= LS_MAT_MIN && A[0][0] <= LS_MAT_MAX);
+ assert(A[0][1] >= LS_MAT_MIN && A[0][1] <= LS_MAT_MAX);
+ assert(A[1][1] >= LS_MAT_MIN && A[1][1] <= LS_MAT_MAX);
+ assert(Bx[0] >= LS_MAT_MIN && Bx[0] <= LS_MAT_MAX);
+ assert(Bx[1] >= LS_MAT_MIN && Bx[1] <= LS_MAT_MAX);
+ assert(By[0] >= LS_MAT_MIN && By[0] <= LS_MAT_MAX);
+ assert(By[1] >= LS_MAT_MIN && By[1] <= LS_MAT_MAX);
+
+ int64_t Det;
+ int16_t iDet, shift;
// Compute Determinant of A
Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
@@ -1755,6 +1127,14 @@ static int find_affine_int(int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
shift = 0;
}
+ int64_t Px[2], Py[2];
+
+ // These divided by the Det, are the least squares solutions
+ Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
+ Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
+ Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
+ Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
+
wm->wmmat[2] = get_mult_shift_diag(Px[0], iDet, shift);
wm->wmmat[3] = get_mult_shift_ndiag(Px[1], iDet, shift);
wm->wmmat[4] = get_mult_shift_ndiag(Py[0], iDet, shift);
@@ -1783,13 +1163,13 @@ int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
int mvx, WarpedMotionParams *wm_params, int mi_row,
int mi_col) {
assert(wm_params->wmtype == AFFINE);
- const int result = find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params,
- mi_row, mi_col);
- if (result == 0) {
- // check compatibility with the fast warp filter
- if (!get_shear_params(wm_params)) return 1;
- }
- return result;
+ if (find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params, mi_row,
+ mi_col))
+ return 1;
+
+ // check compatibility with the fast warp filter
+ if (!get_shear_params(wm_params)) return 1;
+
+ return 0;
}
-#endif // CONFIG_WARPED_MOTION
diff --git a/third_party/aom/av1/common/warped_motion.h b/third_party/aom/av1/common/warped_motion.h
index e05f6a85f..f5da36bbb 100644
--- a/third_party/aom/av1/common/warped_motion.h
+++ b/third_party/aom/av1/common/warped_motion.h
@@ -18,94 +18,79 @@
#include <math.h>
#include <assert.h>
-#include "./aom_config.h"
+#include "config/aom_config.h"
+
#include "aom_ports/mem.h"
#include "aom_dsp/aom_dsp_common.h"
#include "av1/common/mv.h"
#include "av1/common/convolve.h"
#define MAX_PARAMDIM 9
-#if CONFIG_WARPED_MOTION
#define LEAST_SQUARES_SAMPLES_MAX_BITS 3
#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
-
-#if WARPED_MOTION_SORT_SAMPLES
-// Search 1 row on the top and 1 column on the left, 1 upper-left block,
-// 1 upper-right block.
-#define SAMPLES_ARRAY_SIZE ((MAX_MIB_SIZE * 2 + 2) * 2)
-#else
#define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
-#endif // WARPED_MOTION_SORT_SAMPLES
-
+#define WARPED_MOTION_DEBUG 0
#define DEFAULT_WMTYPE AFFINE
-#endif // CONFIG_WARPED_MOTION
extern const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
-typedef void (*ProjectPointsFunc)(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj,
- const int subsampling_x,
- const int subsampling_y);
-
-void project_points_translation(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y);
-
-void project_points_rotzoom(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y);
+static const uint8_t warp_pad_left[14][16] = {
+ { 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 6, 6, 6, 6, 6, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 7, 7, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 15 },
+ { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15 },
+ { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 13, 14, 15 },
+ { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15 },
+ { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 15 },
+ { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15 },
+};
+
+static const uint8_t warp_pad_right[14][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 13 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 11 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
+ { 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
+ { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ { 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+};
void project_points_affine(const int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
const int stride_proj, const int subsampling_x,
const int subsampling_y);
-void project_points_hortrapezoid(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y);
-void project_points_vertrapezoid(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y);
-void project_points_homography(const int32_t *mat, int *points, int *proj,
- const int n, const int stride_points,
- const int stride_proj, const int subsampling_x,
- const int subsampling_y);
-
// Returns the error between the result of applying motion 'wm' to the frame
// described by 'ref' and the frame described by 'dst'.
-int64_t av1_warp_error(WarpedMotionParams *wm,
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
+int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd,
const uint8_t *ref, int width, int height, int stride,
uint8_t *dst, int p_col, int p_row, int p_width,
int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int x_scale, int y_scale,
- int64_t best_error);
+ int subsampling_y, int64_t best_error);
// Returns the error between the frame described by 'ref' and the frame
// described by 'dst'.
-int64_t av1_frame_error(
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
- const uint8_t *ref, int stride, uint8_t *dst, int p_width, int p_height,
- int p_stride);
+int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
+ uint8_t *dst, int p_width, int p_height, int p_stride);
-void av1_warp_plane(WarpedMotionParams *wm,
-#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
-#endif // CONFIG_HIGHBITDEPTH
+void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
const uint8_t *ref, int width, int height, int stride,
uint8_t *pred, int p_col, int p_row, int p_width,
int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int x_scale, int y_scale,
- ConvolveParams *conv_params);
+ int subsampling_y, ConvolveParams *conv_params);
int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
int mvx, WarpedMotionParams *wm_params, int mi_row,
diff --git a/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c
new file mode 100644
index 000000000..8aa14696f
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/convolve.h"
+#include "av1/common/resize.h"
+#include "aom_dsp/x86/synonyms.h"
+
+// Note: If the crop width is not a multiple of 4, then, unlike the C version,
+// this function will overwrite some of the padding on the right hand side of
+// the frame. This padding appears to be trashed anyway, so this should not
+// affect the running of the decoder.
+void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ const int16_t *x_filters, int x0_qn,
+ int x_step_qn) {
+ assert(UPSCALE_NORMATIVE_TAPS == 8);
+
+ src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
+
+ const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
+ const __m128i zero = _mm_setzero_si128();
+
+ const uint8_t *src_y;
+ uint8_t *dst_y;
+ int x_qn = x0_qn;
+ for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) {
+ const int x_filter_idx0 =
+ ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx1 =
+ ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx2 =
+ ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx3 =
+ ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+
+ assert(x_filter_idx0 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx1 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx2 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx3 <= RS_SUBPEL_MASK);
+
+ const int16_t *const x_filter0 =
+ &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter1 =
+ &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter2 =
+ &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter3 =
+ &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS];
+
+ const __m128i fil0_16 = xx_loadu_128(x_filter0);
+ const __m128i fil1_16 = xx_loadu_128(x_filter1);
+ const __m128i fil2_16 = xx_loadu_128(x_filter2);
+ const __m128i fil3_16 = xx_loadu_128(x_filter3);
+
+ src_y = src;
+ dst_y = dst;
+ for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) {
+ const uint8_t *const src_x0 =
+ &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint8_t *const src_x1 =
+ &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint8_t *const src_x2 =
+ &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint8_t *const src_x3 =
+ &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+
+ // Load up the source data. This is 8-bit input data, so each load
+ // gets 8 pixels.
+ const __m128i src0_8 = xx_loadl_64(src_x0);
+ const __m128i src1_8 = xx_loadl_64(src_x1);
+ const __m128i src2_8 = xx_loadl_64(src_x2);
+ const __m128i src3_8 = xx_loadl_64(src_x3);
+
+ // Now zero-extend up to 16-bit precision, i.e.
+ // [ 00 00 00 00 hg fe dc ba ] -> [ 0h 0g 0f 0e 0d 0c 0b 0a ]
+ const __m128i src0_16 = _mm_cvtepu8_epi16(src0_8);
+ const __m128i src1_16 = _mm_cvtepu8_epi16(src1_8);
+ const __m128i src2_16 = _mm_cvtepu8_epi16(src2_8);
+ const __m128i src3_16 = _mm_cvtepu8_epi16(src3_8);
+
+ // Multiply by filter coefficients (results in a 32-bit value),
+ // and add adjacent pairs, i.e.
+ // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ])
+ // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ]
+ const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16);
+ const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16);
+ const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16);
+ const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16);
+
+ // Reduce horizontally and add, i.e.
+ // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ]
+ const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32);
+ const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32);
+
+ const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32);
+
+ // Divide down by (1 << FILTER_BITS), rounding to nearest.
+ const __m128i shifted_32 =
+ _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS);
+
+ // Pack 32-bit values into 16-bit values, i.e.
+ // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ]
+ const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero);
+
+ // Pack 16-bit values into 8-bit values, i.e.
+ // ([ 0 0 0 0 D C B A ], [ 0 0 0 0 0 0 0 0 ])
+ // -> [ 0 0 0 0 0 0 DC BA ]
+ const __m128i shifted_8 = _mm_packus_epi16(shifted_16, zero);
+
+ // Write to the output
+ xx_storel_32(&dst_y[x], shifted_8);
+ }
+ }
+}
+
+// Note: If the crop width is not a multiple of 4, then, unlike the C version,
+// this function will overwrite some of the padding on the right hand side of
+// the frame. This padding appears to be trashed anyway, so this should not
+// affect the running of the decoder.
+void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w,
+ int h, const int16_t *x_filters,
+ int x0_qn, int x_step_qn, int bd) {
+ assert(UPSCALE_NORMATIVE_TAPS == 8);
+ assert(bd == 8 || bd == 10 || bd == 12);
+
+ src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
+
+ const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i clip_maximum = _mm_set1_epi16((1 << bd) - 1);
+
+ const uint16_t *src_y;
+ uint16_t *dst_y;
+ int x_qn = x0_qn;
+ for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) {
+ const int x_filter_idx0 =
+ ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx1 =
+ ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx2 =
+ ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+ const int x_filter_idx3 =
+ ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
+
+ assert(x_filter_idx0 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx1 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx2 <= RS_SUBPEL_MASK);
+ assert(x_filter_idx3 <= RS_SUBPEL_MASK);
+
+ const int16_t *const x_filter0 =
+ &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter1 =
+ &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter2 =
+ &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS];
+ const int16_t *const x_filter3 =
+ &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS];
+
+ const __m128i fil0_16 = xx_loadu_128(x_filter0);
+ const __m128i fil1_16 = xx_loadu_128(x_filter1);
+ const __m128i fil2_16 = xx_loadu_128(x_filter2);
+ const __m128i fil3_16 = xx_loadu_128(x_filter3);
+
+ src_y = src;
+ dst_y = dst;
+ for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) {
+ const uint16_t *const src_x0 =
+ &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint16_t *const src_x1 =
+ &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint16_t *const src_x2 =
+ &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+ const uint16_t *const src_x3 =
+ &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
+
+ // Load up the source data. This is 16-bit input data, so each load
+ // gets 8 pixels.
+ const __m128i src0_16 = xx_loadu_128(src_x0);
+ const __m128i src1_16 = xx_loadu_128(src_x1);
+ const __m128i src2_16 = xx_loadu_128(src_x2);
+ const __m128i src3_16 = xx_loadu_128(src_x3);
+
+ // Multiply by filter coefficients (results in a 32-bit value),
+ // and add adjacent pairs, i.e.
+ // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ])
+ // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ]
+ const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16);
+ const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16);
+ const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16);
+ const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16);
+
+ // Reduce horizontally and add, i.e.
+ // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ]
+ const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32);
+ const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32);
+
+ const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32);
+
+ // Divide down by (1 << FILTER_BITS), rounding to nearest.
+ const __m128i shifted_32 =
+ _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS);
+
+ // Pack 32-bit values into 16-bit values, i.e.
+ // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ]
+ const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero);
+
+ // Clip the values at (1 << bd) - 1
+ const __m128i clipped_16 = _mm_min_epi16(shifted_16, clip_maximum);
+
+ // Write to the output
+ xx_storel_64(&dst_y[x], clipped_16);
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
index 1f0fedb2a..6747cae01 100644
--- a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
@@ -12,135 +12,16 @@
#include <assert.h>
#include <smmintrin.h>
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
#include "aom_dsp/aom_convolve.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/aom_filter.h"
#include "av1/common/convolve.h"
-// Make a mask for coefficients of 10/12 tap filters. The coefficients are
-// packed "89ab89ab". If it's a 12-tap filter, we want all 1's; if it's a
-// 10-tap filter, we want "11001100" to just match the 8,9 terms.
-static __m128i make_1012_mask(int ntaps) {
- uint32_t low = 0xffffffff;
- uint32_t high = (ntaps == 12) ? low : 0;
- return _mm_set_epi32(high, low, high, low);
-}
-
-// Zero-extend the given input operand to an entire __m128i register.
-//
-// Note that there's almost an intrinsic to do this but 32-bit Visual Studio
-// doesn't have _mm_set_epi64x so we have to do it by hand.
-static __m128i extend_32_to_128(uint32_t x) {
- return _mm_set_epi32(0, 0, 0, x);
-}
-
-// Load an SSE register from p and bitwise AND with a.
-static __m128i load_and_128i(const void *p, __m128i a) {
- const __m128d ad = _mm_castsi128_pd(a);
- const __m128d bd = _mm_load1_pd((const double *)p);
- return _mm_castpd_si128(_mm_and_pd(ad, bd));
-}
-
-// The horizontal filter for av1_convolve_2d_scale_sse4_1. This is the more
-// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
-// hfilter8.
-static void hfilter(const uint8_t *src, int src_stride, int32_t *dst, int w,
- int h, int subpel_x_qn, int x_step_qn,
- const InterpFilterParams *filter_params, unsigned round) {
- const int bd = 8;
- const int ntaps = filter_params->taps;
- assert(ntaps == 10 || ntaps == 12);
-
- src -= ntaps / 2 - 1;
-
- // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
- // out the unneeded entries.
- const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
- int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(round);
-
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
- // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
- // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
- // are masked out with hicoeff_mask.
- const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
- const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
- const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-
- int y;
- for (y = 0; y <= h - 4; y += 4) {
- const uint8_t *const src0 = src_col + y * src_stride;
- const uint8_t *const src1 = src0 + 1 * src_stride;
- const uint8_t *const src2 = src0 + 2 * src_stride;
- const uint8_t *const src3 = src0 + 3 * src_stride;
-
- // Load up source data. This is 8-bit input data, so each load gets 16
- // pixels (we need at most 12)
- const __m128i data08 = _mm_loadu_si128((__m128i *)src0);
- const __m128i data18 = _mm_loadu_si128((__m128i *)src1);
- const __m128i data28 = _mm_loadu_si128((__m128i *)src2);
- const __m128i data38 = _mm_loadu_si128((__m128i *)src3);
-
- // Now zero-extend up to 16-bit precision by interleaving with zeros. For
- // the "high" pixels (8 to 11), interleave first (so that the expansion
- // to 16-bits operates on an entire register).
- const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
- const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
- const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
- const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
- const __m128i data01hi8 = _mm_unpackhi_epi32(data08, data18);
- const __m128i data23hi8 = _mm_unpackhi_epi32(data28, data38);
- const __m128i data01hi = _mm_unpacklo_epi8(data01hi8, zero);
- const __m128i data23hi = _mm_unpacklo_epi8(data23hi8, zero);
-
- // Multiply by coefficients
- const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
- const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
- const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
- const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
- const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
- const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
-
- // Reduce horizontally and add
- const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
- const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
- const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
- const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
- const __m128i conv = _mm_add_epi32(convlo, convhi);
-
- // Divide down by (1 << round), rounding to nearest.
- const __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
- // Write transposed to the output
- _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
- }
- for (; y < h; ++y) {
- const uint8_t *const src_row = src_col + y * src_stride;
-
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < ntaps; ++k) {
- sum += filter[k] * src_row[k];
- }
-
- dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
- }
- }
-}
-
// A specialised version of hfilter, the horizontal filter for
// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
+static void hfilter8(const uint8_t *src, int src_stride, int16_t *dst, int w,
int h, int subpel_x_qn, int x_step_qn,
const InterpFilterParams *filter_params, unsigned round) {
const int bd = 8;
@@ -150,7 +31,7 @@ static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(round);
+ const __m128i round_shift = _mm_cvtsi32_si128(round);
int x_qn = subpel_x_qn;
for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
@@ -197,11 +78,12 @@ static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
// Divide down by (1 << round), rounding to nearest.
- const __m128i shifted =
+ __m128i shifted =
_mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+ shifted = _mm_packus_epi32(shifted, shifted);
// Write transposed to the output
- _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+ _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted);
}
for (; y < h; ++y) {
const uint8_t *const src_row = src_col + y * src_stride;
@@ -216,256 +98,179 @@ static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
}
}
-// Do a 12-tap convolution with the given coefficients, loading data from src.
-static __m128i convolve_32(const int32_t *src, __m128i coeff03, __m128i coeff47,
- __m128i coeff8d) {
- const __m128i data03 = _mm_loadu_si128((__m128i *)src);
- const __m128i data47 = _mm_loadu_si128((__m128i *)(src + 4));
- const __m128i data8d = _mm_loadu_si128((__m128i *)(src + 8));
- const __m128i conv03 = _mm_mullo_epi32(data03, coeff03);
- const __m128i conv47 = _mm_mullo_epi32(data47, coeff47);
- const __m128i conv8d = _mm_mullo_epi32(data8d, coeff8d);
- return _mm_add_epi32(_mm_add_epi32(conv03, conv47), conv8d);
-}
-
-// Do an 8-tap convolution with the given coefficients, loading data from src.
-static __m128i convolve_32_8(const int32_t *src, __m128i coeff03,
- __m128i coeff47) {
- const __m128i data03 = _mm_loadu_si128((__m128i *)src);
- const __m128i data47 = _mm_loadu_si128((__m128i *)(src + 4));
- const __m128i conv03 = _mm_mullo_epi32(data03, coeff03);
- const __m128i conv47 = _mm_mullo_epi32(data47, coeff47);
- return _mm_add_epi32(conv03, conv47);
-}
-
-// The vertical filter for av1_convolve_2d_scale_sse4_1. This is the more
-// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
-// vfilter8.
-static void vfilter(const int32_t *src, int src_stride, int32_t *dst,
- int dst_stride, int w, int h, int subpel_y_qn,
- int y_step_qn, const InterpFilterParams *filter_params,
- const ConvolveParams *conv_params, int bd) {
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int ntaps = filter_params->taps;
-
- // Construct a mask with which we'll AND filter coefficients 89ab to zero out
- // the unneeded entries. The upper bits of this mask are unused.
- const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
- int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(conv_params->round_1);
-
- const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- const __m128i sub = _mm_set1_epi32(sub32);
-
- int y_qn = subpel_y_qn;
- for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
- // Load up coefficients for the filter and sign-extend to 32-bit precision
- // (to do so, calculate sign bits and then interleave)
- const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
- const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
- const __m128i coeffhi16 = load_and_128i(filter + 8, hicoeff_mask);
- const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
- const __m128i csignhi16 = _mm_cmplt_epi16(coeffhi16, zero);
- const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
- const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
- const __m128i coeff8d = _mm_unpacklo_epi16(coeffhi16, csignhi16);
-
- int x;
- for (x = 0; x <= w - 4; x += 4) {
- const int32_t *const src0 = src_y + x * src_stride;
- const int32_t *const src1 = src0 + 1 * src_stride;
- const int32_t *const src2 = src0 + 2 * src_stride;
- const int32_t *const src3 = src0 + 3 * src_stride;
-
- // Load the source data for the three rows, adding the three registers of
- // convolved products to one as we go (conv0..conv3) to avoid the
- // register pressure getting too high.
- const __m128i conv0 = convolve_32(src0, coeff03, coeff47, coeff8d);
- const __m128i conv1 = convolve_32(src1, coeff03, coeff47, coeff8d);
- const __m128i conv2 = convolve_32(src2, coeff03, coeff47, coeff8d);
- const __m128i conv3 = convolve_32(src3, coeff03, coeff47, coeff8d);
-
- // Now reduce horizontally to get one lane for each result
- const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
- const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
- const __m128i conv = _mm_hadd_epi32(conv01, conv23);
-
- // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
- const __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
- const __m128i subbed = _mm_sub_epi32(shifted, sub);
-
- int32_t *dst_x = dst + y * dst_stride + x;
- const __m128i result =
- (conv_params->do_average)
- ? _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x))
- : subbed;
-
- _mm_storeu_si128((__m128i *)dst_x, result);
- }
- for (; x < w; ++x) {
- const int32_t *src_x = src_y + x * src_stride;
- CONV_BUF_TYPE sum = 1 << offset_bits;
- for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
- }
- }
+static __m128i convolve_16_8(const int16_t *src, __m128i coeff) {
+ __m128i data = _mm_loadu_si128((__m128i *)src);
+ return _mm_madd_epi16(data, coeff);
}
// A specialised version of vfilter, the vertical filter for
// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-static void vfilter8(const int32_t *src, int src_stride, int32_t *dst,
+static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int subpel_y_qn,
int y_step_qn, const InterpFilterParams *filter_params,
const ConvolveParams *conv_params, int bd) {
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int ntaps = 8;
- int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(conv_params->round_1);
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
(1 << (offset_bits - conv_params->round_1 - 1)));
- const __m128i sub = _mm_set1_epi32(sub32);
+ const __m128i sub = _mm_set1_epi16(sub32);
+
+ CONV_BUF_TYPE *dst16 = conv_params->dst;
+ const int dst16_stride = conv_params->dst_stride;
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const __m128i bits_shift = _mm_cvtsi32_si128(bits);
+ const __m128i bits_const = _mm_set1_epi16(((1 << bits) >> 1));
+ const __m128i round_shift_add =
+ _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
+ const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits);
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
int y_qn = subpel_y_qn;
for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
+ const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(filter_idx < SUBPEL_SHIFTS);
const int16_t *filter =
av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
- // Load up coefficients for the filter and sign-extend to 32-bit precision
- // (to do so, calculate sign bits and then interleave)
- const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
- const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
- const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
- const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
-
int x;
for (x = 0; x <= w - 4; x += 4) {
- const int32_t *const src0 = src_y + x * src_stride;
- const int32_t *const src1 = src0 + 1 * src_stride;
- const int32_t *const src2 = src0 + 2 * src_stride;
- const int32_t *const src3 = src0 + 3 * src_stride;
+ const int16_t *const src0 = src_y + x * src_stride;
+ const int16_t *const src1 = src0 + 1 * src_stride;
+ const int16_t *const src2 = src0 + 2 * src_stride;
+ const int16_t *const src3 = src0 + 3 * src_stride;
// Load the source data for the three rows, adding the three registers of
// convolved products to one as we go (conv0..conv3) to avoid the
// register pressure getting too high.
- const __m128i conv0 = convolve_32_8(src0, coeff03, coeff47);
- const __m128i conv1 = convolve_32_8(src1, coeff03, coeff47);
- const __m128i conv2 = convolve_32_8(src2, coeff03, coeff47);
- const __m128i conv3 = convolve_32_8(src3, coeff03, coeff47);
+ const __m128i conv0 = convolve_16_8(src0, coeff0716);
+ const __m128i conv1 = convolve_16_8(src1, coeff0716);
+ const __m128i conv2 = convolve_16_8(src2, coeff0716);
+ const __m128i conv3 = convolve_16_8(src3, coeff0716);
// Now reduce horizontally to get one lane for each result
const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
- const __m128i conv = _mm_hadd_epi32(conv01, conv23);
+ __m128i conv = _mm_hadd_epi32(conv01, conv23);
+ conv = _mm_add_epi32(conv, res_add_const);
// Divide down by (1 << round_1), rounding to nearest and subtract sub32.
- const __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
- const __m128i subbed = _mm_sub_epi32(shifted, sub);
-
- int32_t *dst_x = dst + y * dst_stride + x;
- const __m128i result =
- (conv_params->do_average)
- ? _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x))
- : subbed;
-
- _mm_storeu_si128((__m128i *)dst_x, result);
+ __m128i shifted =
+ _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
+
+ uint8_t *dst_x = dst + y * dst_stride + x;
+ CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
+ __m128i result;
+ __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
+
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ const __m128i p_16 = _mm_loadl_epi64((__m128i *)dst_16_x);
+ if (conv_params->use_jnt_comp_avg) {
+ const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, shifted_16);
+ const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, wt);
+ const __m128i shifted_32 =
+ _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
+ shifted_16 = _mm_packus_epi32(shifted_32, shifted_32);
+ } else {
+ shifted_16 = _mm_srai_epi16(_mm_add_epi16(p_16, shifted_16), 1);
+ }
+ const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
+ result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
+ const __m128i result_8 = _mm_packus_epi16(result, result);
+ *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+ } else {
+ _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
+ }
+ } else {
+ const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
+ result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
+ const __m128i result_8 = _mm_packus_epi16(result, result);
+ *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+ }
}
for (; x < w; ++x) {
- const int32_t *src_x = src_y + x * src_stride;
- CONV_BUF_TYPE sum = 1 << offset_bits;
+ const int16_t *src_x = src_y + x * src_stride;
+ int32_t sum = 1 << offset_bits;
for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
- if (conv_params->do_average)
- dst[y * dst_stride + x] += res;
- else
- dst[y * dst_stride + x] = res;
+ CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ int32_t tmp = dst16[y * dst16_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ /* Subtract round offset and convolve round */
+ tmp = tmp - sub32;
+ dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
+ } else {
+ dst16[y * dst16_stride + x] = res;
+ }
+ } else {
+ /* Subtract round offset and convolve round */
+ int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
+ }
}
}
}
-
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
- int h, InterpFilterParams *filter_params_x,
+ uint8_t *dst8, int dst8_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
- int32_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+ // TODO(yaowu): remove unnecessary initializations
+ int16_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE] = { 0 };
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
const int xtaps = filter_params_x->taps;
const int ytaps = filter_params_y->taps;
-
const int fo_vert = ytaps / 2 - 1;
+ assert((xtaps == 8) && (ytaps == 8));
+ (void)xtaps;
// horizontal filter
- if (xtaps == 8)
- hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
- x_step_qn, filter_params_x, conv_params->round_0);
- else
- hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
- x_step_qn, filter_params_x, conv_params->round_0);
+ hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
+ x_step_qn, filter_params_x, conv_params->round_0);
// vertical filter (input is transposed)
- if (ytaps == 8)
- vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, 8);
- else
- vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, 8);
+ vfilter8(tmp, im_h, dst8, dst8_stride, w, h, subpel_y_qn, y_step_qn,
+ filter_params_y, conv_params, 8);
}
-#if CONFIG_HIGHBITDEPTH
-// An wrapper to generate the SHUFPD instruction with __m128i types (just
-// writing _mm_shuffle_pd at the callsites gets a bit ugly because of the
-// casts)
-static __m128i mm_shuffle0_si128(__m128i a, __m128i b) {
- __m128d ad = _mm_castsi128_pd(a);
- __m128d bd = _mm_castsi128_pd(b);
- return _mm_castpd_si128(_mm_shuffle_pd(ad, bd, 0));
-}
-
-// The horizontal filter for av1_highbd_convolve_2d_scale_sse4_1. This
-// is the more general version, supporting 10 and 12 tap filters. For
-// 8-tap filters, use hfilter8.
-static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
- int w, int h, int subpel_x_qn, int x_step_qn,
- const InterpFilterParams *filter_params,
- unsigned round, int bd) {
- const int ntaps = filter_params->taps;
- assert(ntaps == 10 || ntaps == 12);
+// A specialised version of hfilter, the horizontal filter for
+// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
+// filters.
+static void highbd_hfilter8(const uint16_t *src, int src_stride, int16_t *dst,
+ int w, int h, int subpel_x_qn, int x_step_qn,
+ const InterpFilterParams *filter_params,
+ unsigned round, int bd) {
+ const int ntaps = 8;
src -= ntaps / 2 - 1;
- // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
- // out the unneeded entries.
- const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(round);
+ const __m128i round_shift = _mm_cvtsi32_si128(round);
int x_qn = subpel_x_qn;
for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
@@ -475,11 +280,8 @@ static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
const int16_t *filter =
av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
- // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
- // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
- // are masked out with hicoeff_mask.
+ // Load the filter coefficients
const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
- const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
int y;
for (y = 0; y <= h - 4; y += 4) {
@@ -488,43 +290,31 @@ static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
const uint16_t *const src2 = src0 + 2 * src_stride;
const uint16_t *const src3 = src0 + 3 * src_stride;
- // Load up source data. This is 16-bit input data, so each load gets 8
- // pixels (we need at most 12)
+ // Load up source data. This is 16-bit input data, so each load gets the 8
+ // pixels we need.
const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
- const __m128i data0hi = _mm_loadu_si128((__m128i *)(src0 + 8));
- const __m128i data1hi = _mm_loadu_si128((__m128i *)(src1 + 8));
- const __m128i data2hi = _mm_loadu_si128((__m128i *)(src2 + 8));
- const __m128i data3hi = _mm_loadu_si128((__m128i *)(src3 + 8));
-
- // The "hi" data has rubbish in the top half so interleave pairs together
- // to minimise the calculation we need to do.
- const __m128i data01hi = mm_shuffle0_si128(data0hi, data1hi);
- const __m128i data23hi = mm_shuffle0_si128(data2hi, data3hi);
// Multiply by coefficients
const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
- const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
- const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
// Reduce horizontally and add
const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
- const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
- const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
- const __m128i conv = _mm_add_epi32(convlo, convhi);
+ const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
// Divide down by (1 << round), rounding to nearest.
- const __m128i shifted =
+ __m128i shifted =
_mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+ shifted = _mm_packus_epi32(shifted, shifted);
// Write transposed to the output
- _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+ _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted);
}
for (; y < h; ++y) {
const uint16_t *const src_row = src_col + y * src_stride;
@@ -538,108 +328,173 @@ static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
}
}
}
-
-// A specialised version of hfilter, the horizontal filter for
+// A specialised version of vfilter, the vertical filter for
// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
// filters.
-static void highbd_hfilter8(const uint16_t *src, int src_stride, int32_t *dst,
- int w, int h, int subpel_x_qn, int x_step_qn,
+static void highbd_vfilter8(const int16_t *src, int src_stride, uint16_t *dst,
+ int dst_stride, int w, int h, int subpel_y_qn,
+ int y_step_qn,
const InterpFilterParams *filter_params,
- unsigned round, int bd) {
+ const ConvolveParams *conv_params, int bd) {
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int ntaps = 8;
- src -= ntaps / 2 - 1;
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
- int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = extend_32_to_128(round);
+ const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ const __m128i sub = _mm_set1_epi32(sub32);
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+ CONV_BUF_TYPE *dst16 = conv_params->dst;
+ const int dst16_stride = conv_params->dst_stride;
+ const __m128i clip_pixel_ =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const __m128i bits_shift = _mm_cvtsi32_si128(bits);
+ const __m128i bits_const = _mm_set1_epi32(((1 << bits) >> 1));
+ const __m128i round_shift_add =
+ _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
+ const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits);
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits);
+ __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1));
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+
+ int y_qn = subpel_y_qn;
+ for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+ const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
+ const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(filter_idx < SUBPEL_SHIFTS);
const int16_t *filter =
av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
- // Load the filter coefficients
- const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-
- int y;
- for (y = 0; y <= h - 4; y += 4) {
- const uint16_t *const src0 = src_col + y * src_stride;
- const uint16_t *const src1 = src0 + 1 * src_stride;
- const uint16_t *const src2 = src0 + 2 * src_stride;
- const uint16_t *const src3 = src0 + 3 * src_stride;
-
- // Load up source data. This is 16-bit input data, so each load gets the 8
- // pixels we need.
- const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
- const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
- const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
- const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
-
- // Multiply by coefficients
- const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
- const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
- const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
- const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+ const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
+ int x;
+ for (x = 0; x <= w - 4; x += 4) {
+ const int16_t *const src0 = src_y + x * src_stride;
+ const int16_t *const src1 = src0 + 1 * src_stride;
+ const int16_t *const src2 = src0 + 2 * src_stride;
+ const int16_t *const src3 = src0 + 3 * src_stride;
- // Reduce horizontally and add
- const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
- const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
- const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
+ // Load the source data for the three rows, adding the three registers of
+ // convolved products to one as we go (conv0..conv3) to avoid the
+ // register pressure getting too high.
+ const __m128i conv0 = convolve_16_8(src0, coeff0716);
+ const __m128i conv1 = convolve_16_8(src1, coeff0716);
+ const __m128i conv2 = convolve_16_8(src2, coeff0716);
+ const __m128i conv3 = convolve_16_8(src3, coeff0716);
- // Divide down by (1 << round), rounding to nearest.
- const __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+ // Now reduce horizontally to get one lane for each result
+ const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
+ const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
+ __m128i conv = _mm_hadd_epi32(conv01, conv23);
+ conv = _mm_add_epi32(conv, res_add_const);
- // Write transposed to the output
- _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+ // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
+ __m128i shifted =
+ _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
+
+ uint16_t *dst_x = dst + y * dst_stride + x;
+ CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
+
+ __m128i result;
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ __m128i p_32 =
+ _mm_cvtepu16_epi32(_mm_loadl_epi64((__m128i *)dst_16_x));
+
+ if (conv_params->use_jnt_comp_avg) {
+ shifted = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0),
+ _mm_mullo_epi32(shifted, wt1));
+ shifted = _mm_srai_epi32(shifted, DIST_PRECISION_BITS);
+ } else {
+ shifted = _mm_srai_epi32(_mm_add_epi32(p_32, shifted), 1);
+ }
+ __m128i res32 = _mm_sub_epi32(shifted, sub);
+ res32 = _mm_sra_epi32(_mm_add_epi32(res32, round_bits_const),
+ round_bits_shift);
+
+ __m128i res16 = _mm_packus_epi32(res32, res32);
+ res16 = _mm_min_epi16(res16, clip_pixel_);
+ _mm_storel_epi64((__m128i *)dst_x, res16);
+ } else {
+ __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
+ _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
+ }
+ } else {
+ const __m128i subbed = _mm_sub_epi32(shifted, sub);
+ result = _mm_sra_epi16(_mm_add_epi32(subbed, bits_const), bits_shift);
+ result = _mm_packus_epi32(result, result);
+ result = _mm_min_epi16(result, clip_pixel_);
+ _mm_storel_epi64((__m128i *)dst_x, result);
+ }
}
- for (; y < h; ++y) {
- const uint16_t *const src_row = src_col + y * src_stride;
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < ntaps; ++k) {
- sum += filter[k] * src_row[k];
+ for (; x < w; ++x) {
+ const int16_t *src_x = src_y + x * src_stride;
+ int32_t sum = 1 << offset_bits;
+ for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
+ CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+ if (conv_params->is_compound) {
+ if (conv_params->do_average) {
+ int32_t tmp = dst16[y * dst16_stride + x];
+ if (conv_params->use_jnt_comp_avg) {
+ tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+ tmp = tmp >> DIST_PRECISION_BITS;
+ } else {
+ tmp += res;
+ tmp = tmp >> 1;
+ }
+ /* Subtract round offset and convolve round */
+ tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+ } else {
+ dst16[y * dst16_stride + x] = res;
+ }
+ } else {
+ /* Subtract round offset and convolve round */
+ int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ dst[y * dst_stride + x] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
}
-
- dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
}
}
}
void av1_highbd_convolve_2d_scale_sse4_1(
- const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride,
- int w, int h, InterpFilterParams *filter_params_x,
+ const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_qn,
const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params, int bd) {
- int32_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+ // TODO(yaowu): Move this out of stack
+ DECLARE_ALIGNED(16, int16_t,
+ tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
-
const int xtaps = filter_params_x->taps;
const int ytaps = filter_params_y->taps;
const int fo_vert = ytaps / 2 - 1;
+ memset(tmp, 0, sizeof(tmp));
+ assert((xtaps == 8) && (ytaps == 8));
+ (void)xtaps;
+
// horizontal filter
- if (xtaps == 8)
- highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
- subpel_x_qn, x_step_qn, filter_params_x,
- conv_params->round_0, bd);
- else
- highbd_hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
- subpel_x_qn, x_step_qn, filter_params_x,
- conv_params->round_0, bd);
+ highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
+ subpel_x_qn, x_step_qn, filter_params_x, conv_params->round_0,
+ bd);
// vertical filter (input is transposed)
- if (ytaps == 8)
- vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, bd);
- else
- vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, bd);
+ highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+ filter_params_y, conv_params, bd);
}
-#endif // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/av1/common/x86/av1_convolve_ssse3.c b/third_party/aom/av1/common/x86/av1_convolve_ssse3.c
deleted file mode 100644
index e85c15eaf..000000000
--- a/third_party/aom/av1/common/x86/av1_convolve_ssse3.c
+++ /dev/null
@@ -1,1034 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <tmmintrin.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "av1/common/filter.h"
-
-#define WIDTH_BOUND (16)
-#define HEIGHT_BOUND (16)
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-DECLARE_ALIGNED(16, static int8_t,
- sub_pel_filters_12sharp_signal_dir[15][2][16]);
-
-DECLARE_ALIGNED(16, static int8_t,
- sub_pel_filters_12sharp_ver_signal_dir[15][6][16]);
-#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, static int8_t,
- sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]);
-
-DECLARE_ALIGNED(16, static int8_t,
- sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]);
-#endif
-
-typedef int8_t (*SubpelFilterCoeffs)[16];
-
-static INLINE SubpelFilterCoeffs
-get_subpel_filter_signal_dir(const InterpFilterParams p, int index) {
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- if (p.interp_filter == MULTITAP_SHARP) {
- return &sub_pel_filters_12sharp_signal_dir[index][0];
- }
-#endif
-#if USE_TEMPORALFILTER_12TAP
- if (p.interp_filter == TEMPORALFILTER_12TAP) {
- return &sub_pel_filters_temporalfilter_12_signal_dir[index][0];
- }
-#endif
- (void)p;
- (void)index;
- return NULL;
-}
-
-static INLINE SubpelFilterCoeffs
-get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- if (p.interp_filter == MULTITAP_SHARP) {
- return &sub_pel_filters_12sharp_ver_signal_dir[index][0];
- }
-#endif
-#if USE_TEMPORALFILTER_12TAP
- if (p.interp_filter == TEMPORALFILTER_12TAP) {
- return &sub_pel_filters_temporalfilter_12_ver_signal_dir[index][0];
- }
-#endif
- (void)p;
- (void)index;
- return NULL;
-}
-
-static INLINE void transpose_4x8(const __m128i *in, __m128i *out) {
- __m128i t0, t1;
-
- t0 = _mm_unpacklo_epi16(in[0], in[1]);
- t1 = _mm_unpacklo_epi16(in[2], in[3]);
-
- out[0] = _mm_unpacklo_epi32(t0, t1);
- out[1] = _mm_srli_si128(out[0], 8);
- out[2] = _mm_unpackhi_epi32(t0, t1);
- out[3] = _mm_srli_si128(out[2], 8);
-
- t0 = _mm_unpackhi_epi16(in[0], in[1]);
- t1 = _mm_unpackhi_epi16(in[2], in[3]);
-
- out[4] = _mm_unpacklo_epi32(t0, t1);
- out[5] = _mm_srli_si128(out[4], 8);
- // Note: We ignore out[6] and out[7] because
- // they're zero vectors.
-}
-
-typedef void (*store_pixel_t)(const __m128i *x, uint8_t *dst);
-
-static INLINE __m128i accumulate_store(const __m128i *x, uint8_t *src) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- __m128i y = _mm_loadl_epi64((__m128i const *)src);
- y = _mm_unpacklo_epi8(y, zero);
- y = _mm_add_epi16(*x, y);
- y = _mm_add_epi16(y, one);
- y = _mm_srai_epi16(y, 1);
- y = _mm_packus_epi16(y, y);
- return y;
-}
-
-static INLINE void store_2_pixel_only(const __m128i *x, uint8_t *dst) {
- uint32_t temp;
- __m128i u = _mm_packus_epi16(*x, *x);
- temp = _mm_cvtsi128_si32(u);
- *(uint16_t *)dst = (uint16_t)temp;
-}
-
-static INLINE void accumulate_store_2_pixel(const __m128i *x, uint8_t *dst) {
- uint32_t temp;
- __m128i y = accumulate_store(x, dst);
- temp = _mm_cvtsi128_si32(y);
- *(uint16_t *)dst = (uint16_t)temp;
-}
-
-static store_pixel_t store2pixelTab[2] = { store_2_pixel_only,
- accumulate_store_2_pixel };
-
-static INLINE void store_4_pixel_only(const __m128i *x, uint8_t *dst) {
- __m128i u = _mm_packus_epi16(*x, *x);
- *(int *)dst = _mm_cvtsi128_si32(u);
-}
-
-static INLINE void accumulate_store_4_pixel(const __m128i *x, uint8_t *dst) {
- __m128i y = accumulate_store(x, dst);
- *(int *)dst = _mm_cvtsi128_si32(y);
-}
-
-static store_pixel_t store4pixelTab[2] = { store_4_pixel_only,
- accumulate_store_4_pixel };
-
-static void horiz_w4_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store_func, uint8_t *dst) {
- __m128i sumPairRow[4];
- __m128i sumPairCol[8];
- __m128i pixel;
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- const __m128i zero = _mm_setzero_si128();
-
- assert(tapsNum == 10 || tapsNum == 12);
- if (10 == tapsNum) {
- src -= 1;
- }
-
- pixel = _mm_loadu_si128((__m128i const *)src);
- sumPairRow[0] = _mm_maddubs_epi16(pixel, f[0]);
- sumPairRow[2] = _mm_maddubs_epi16(pixel, f[1]);
- sumPairRow[2] = _mm_srli_si128(sumPairRow[2], 2);
-
- pixel = _mm_loadu_si128((__m128i const *)(src + 1));
- sumPairRow[1] = _mm_maddubs_epi16(pixel, f[0]);
- sumPairRow[3] = _mm_maddubs_epi16(pixel, f[1]);
- sumPairRow[3] = _mm_srli_si128(sumPairRow[3], 2);
-
- transpose_4x8(sumPairRow, sumPairCol);
-
- sumPairRow[0] = _mm_adds_epi16(sumPairCol[0], sumPairCol[1]);
- sumPairRow[1] = _mm_adds_epi16(sumPairCol[4], sumPairCol[5]);
-
- sumPairRow[2] = _mm_min_epi16(sumPairCol[2], sumPairCol[3]);
- sumPairRow[3] = _mm_max_epi16(sumPairCol[2], sumPairCol[3]);
-
- sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[1]);
- sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[2]);
- sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[3]);
-
- sumPairRow[1] = _mm_mulhrs_epi16(sumPairRow[0], k_256);
- sumPairRow[1] = _mm_packus_epi16(sumPairRow[1], sumPairRow[1]);
- sumPairRow[1] = _mm_unpacklo_epi8(sumPairRow[1], zero);
-
- store_func(&sumPairRow[1], dst);
-}
-
-static void horiz_w8_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store, uint8_t *buf) {
- horiz_w4_ssse3(src, f, tapsNum, store, buf);
- src += 4;
- buf += 4;
- horiz_w4_ssse3(src, f, tapsNum, store, buf);
-}
-
-static void horiz_w16_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store, uint8_t *buf) {
- horiz_w8_ssse3(src, f, tapsNum, store, buf);
- src += 8;
- buf += 8;
- horiz_w8_ssse3(src, f, tapsNum, store, buf);
-}
-
-static void horiz_w32_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store, uint8_t *buf) {
- horiz_w16_ssse3(src, f, tapsNum, store, buf);
- src += 16;
- buf += 16;
- horiz_w16_ssse3(src, f, tapsNum, store, buf);
-}
-
-static void horiz_w64_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store, uint8_t *buf) {
- horiz_w32_ssse3(src, f, tapsNum, store, buf);
- src += 32;
- buf += 32;
- horiz_w32_ssse3(src, f, tapsNum, store, buf);
-}
-
-static void horiz_w128_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
- store_pixel_t store, uint8_t *buf) {
- horiz_w64_ssse3(src, f, tapsNum, store, buf);
- src += 64;
- buf += 64;
- horiz_w64_ssse3(src, f, tapsNum, store, buf);
-}
-
-static void (*horizTab[6])(const uint8_t *, const __m128i *, int, store_pixel_t,
- uint8_t *) = {
- horiz_w4_ssse3, horiz_w8_ssse3, horiz_w16_ssse3,
- horiz_w32_ssse3, horiz_w64_ssse3, horiz_w128_ssse3,
-};
-
-static void filter_horiz_ssse3(const uint8_t *src, __m128i *f, int tapsNum,
- int width, store_pixel_t store, uint8_t *dst) {
- switch (width) {
- // Note:
- // For width=2 and 4, store function must be different
- case 2:
- case 4: horizTab[0](src, f, tapsNum, store, dst); break;
- case 8: horizTab[1](src, f, tapsNum, store, dst); break;
- case 16: horizTab[2](src, f, tapsNum, store, dst); break;
- case 32: horizTab[3](src, f, tapsNum, store, dst); break;
- case 64: horizTab[4](src, f, tapsNum, store, dst); break;
- case 128: horizTab[5](src, f, tapsNum, store, dst); break;
- default: assert(0);
- }
-}
-
-// Vertical 8-pixel parallel
-typedef void (*transpose_to_dst_t)(const uint16_t *src, int src_stride,
- uint8_t *dst, int dst_stride);
-
-static INLINE void transpose8x8_direct_to_dst(const uint16_t *src,
- int src_stride, uint8_t *dst,
- int dst_stride) {
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- __m128i v0, v1, v2, v3;
-
- __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
- __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
- __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
- __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
- __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
-
- u0 = _mm_mulhrs_epi16(u0, k_256);
- u1 = _mm_mulhrs_epi16(u1, k_256);
- u2 = _mm_mulhrs_epi16(u2, k_256);
- u3 = _mm_mulhrs_epi16(u3, k_256);
- u4 = _mm_mulhrs_epi16(u4, k_256);
- u5 = _mm_mulhrs_epi16(u5, k_256);
- u6 = _mm_mulhrs_epi16(u6, k_256);
- u7 = _mm_mulhrs_epi16(u7, k_256);
-
- v0 = _mm_packus_epi16(u0, u1);
- v1 = _mm_packus_epi16(u2, u3);
- v2 = _mm_packus_epi16(u4, u5);
- v3 = _mm_packus_epi16(u6, u7);
-
- u0 = _mm_unpacklo_epi8(v0, v1);
- u1 = _mm_unpackhi_epi8(v0, v1);
- u2 = _mm_unpacklo_epi8(v2, v3);
- u3 = _mm_unpackhi_epi8(v2, v3);
-
- u4 = _mm_unpacklo_epi8(u0, u1);
- u5 = _mm_unpacklo_epi8(u2, u3);
- u6 = _mm_unpackhi_epi8(u0, u1);
- u7 = _mm_unpackhi_epi8(u2, u3);
-
- u0 = _mm_unpacklo_epi32(u4, u5);
- u1 = _mm_unpackhi_epi32(u4, u5);
- u2 = _mm_unpacklo_epi32(u6, u7);
- u3 = _mm_unpackhi_epi32(u6, u7);
-
- u4 = _mm_srli_si128(u0, 8);
- u5 = _mm_srli_si128(u1, 8);
- u6 = _mm_srli_si128(u2, 8);
- u7 = _mm_srli_si128(u3, 8);
-
- _mm_storel_epi64((__m128i *)dst, u0);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
-}
-
-static INLINE void transpose8x8_accumu_to_dst(const uint16_t *src,
- int src_stride, uint8_t *dst,
- int dst_stride) {
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
-
- __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
- __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
- __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
- __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
- __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
-
- u0 = _mm_mulhrs_epi16(u0, k_256);
- u1 = _mm_mulhrs_epi16(u1, k_256);
- u2 = _mm_mulhrs_epi16(u2, k_256);
- u3 = _mm_mulhrs_epi16(u3, k_256);
- u4 = _mm_mulhrs_epi16(u4, k_256);
- u5 = _mm_mulhrs_epi16(u5, k_256);
- u6 = _mm_mulhrs_epi16(u6, k_256);
- u7 = _mm_mulhrs_epi16(u7, k_256);
-
- v0 = _mm_packus_epi16(u0, u1);
- v1 = _mm_packus_epi16(u2, u3);
- v2 = _mm_packus_epi16(u4, u5);
- v3 = _mm_packus_epi16(u6, u7);
-
- u0 = _mm_unpacklo_epi8(v0, v1);
- u1 = _mm_unpackhi_epi8(v0, v1);
- u2 = _mm_unpacklo_epi8(v2, v3);
- u3 = _mm_unpackhi_epi8(v2, v3);
-
- u4 = _mm_unpacklo_epi8(u0, u1);
- u5 = _mm_unpacklo_epi8(u2, u3);
- u6 = _mm_unpackhi_epi8(u0, u1);
- u7 = _mm_unpackhi_epi8(u2, u3);
-
- u0 = _mm_unpacklo_epi32(u4, u5);
- u1 = _mm_unpackhi_epi32(u4, u5);
- u2 = _mm_unpacklo_epi32(u6, u7);
- u3 = _mm_unpackhi_epi32(u6, u7);
-
- u4 = _mm_srli_si128(u0, 8);
- u5 = _mm_srli_si128(u1, 8);
- u6 = _mm_srli_si128(u2, 8);
- u7 = _mm_srli_si128(u3, 8);
-
- v0 = _mm_loadl_epi64((__m128i const *)(dst + 0 * dst_stride));
- v1 = _mm_loadl_epi64((__m128i const *)(dst + 1 * dst_stride));
- v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
- v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
- v4 = _mm_loadl_epi64((__m128i const *)(dst + 4 * dst_stride));
- v5 = _mm_loadl_epi64((__m128i const *)(dst + 5 * dst_stride));
- v6 = _mm_loadl_epi64((__m128i const *)(dst + 6 * dst_stride));
- v7 = _mm_loadl_epi64((__m128i const *)(dst + 7 * dst_stride));
-
- u0 = _mm_unpacklo_epi8(u0, zero);
- u1 = _mm_unpacklo_epi8(u1, zero);
- u2 = _mm_unpacklo_epi8(u2, zero);
- u3 = _mm_unpacklo_epi8(u3, zero);
- u4 = _mm_unpacklo_epi8(u4, zero);
- u5 = _mm_unpacklo_epi8(u5, zero);
- u6 = _mm_unpacklo_epi8(u6, zero);
- u7 = _mm_unpacklo_epi8(u7, zero);
-
- v0 = _mm_unpacklo_epi8(v0, zero);
- v1 = _mm_unpacklo_epi8(v1, zero);
- v2 = _mm_unpacklo_epi8(v2, zero);
- v3 = _mm_unpacklo_epi8(v3, zero);
- v4 = _mm_unpacklo_epi8(v4, zero);
- v5 = _mm_unpacklo_epi8(v5, zero);
- v6 = _mm_unpacklo_epi8(v6, zero);
- v7 = _mm_unpacklo_epi8(v7, zero);
-
- v0 = _mm_adds_epi16(u0, v0);
- v1 = _mm_adds_epi16(u4, v1);
- v2 = _mm_adds_epi16(u1, v2);
- v3 = _mm_adds_epi16(u5, v3);
- v4 = _mm_adds_epi16(u2, v4);
- v5 = _mm_adds_epi16(u6, v5);
- v6 = _mm_adds_epi16(u3, v6);
- v7 = _mm_adds_epi16(u7, v7);
-
- v0 = _mm_adds_epi16(v0, one);
- v1 = _mm_adds_epi16(v1, one);
- v2 = _mm_adds_epi16(v2, one);
- v3 = _mm_adds_epi16(v3, one);
- v4 = _mm_adds_epi16(v4, one);
- v5 = _mm_adds_epi16(v5, one);
- v6 = _mm_adds_epi16(v6, one);
- v7 = _mm_adds_epi16(v7, one);
-
- v0 = _mm_srai_epi16(v0, 1);
- v1 = _mm_srai_epi16(v1, 1);
- v2 = _mm_srai_epi16(v2, 1);
- v3 = _mm_srai_epi16(v3, 1);
- v4 = _mm_srai_epi16(v4, 1);
- v5 = _mm_srai_epi16(v5, 1);
- v6 = _mm_srai_epi16(v6, 1);
- v7 = _mm_srai_epi16(v7, 1);
-
- u0 = _mm_packus_epi16(v0, v1);
- u1 = _mm_packus_epi16(v2, v3);
- u2 = _mm_packus_epi16(v4, v5);
- u3 = _mm_packus_epi16(v6, v7);
-
- u4 = _mm_srli_si128(u0, 8);
- u5 = _mm_srli_si128(u1, 8);
- u6 = _mm_srli_si128(u2, 8);
- u7 = _mm_srli_si128(u3, 8);
-
- _mm_storel_epi64((__m128i *)dst, u0);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
- _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
-}
-
-static transpose_to_dst_t trans8x8Tab[2] = { transpose8x8_direct_to_dst,
- transpose8x8_accumu_to_dst };
-
-static INLINE void transpose_8x16(const __m128i *in, __m128i *out) {
- __m128i t0, t1, t2, t3, u0, u1;
-
- t0 = _mm_unpacklo_epi16(in[0], in[1]);
- t1 = _mm_unpacklo_epi16(in[2], in[3]);
- t2 = _mm_unpacklo_epi16(in[4], in[5]);
- t3 = _mm_unpacklo_epi16(in[6], in[7]);
-
- u0 = _mm_unpacklo_epi32(t0, t1);
- u1 = _mm_unpacklo_epi32(t2, t3);
-
- out[0] = _mm_unpacklo_epi64(u0, u1);
- out[1] = _mm_unpackhi_epi64(u0, u1);
-
- u0 = _mm_unpackhi_epi32(t0, t1);
- u1 = _mm_unpackhi_epi32(t2, t3);
-
- out[2] = _mm_unpacklo_epi64(u0, u1);
- out[3] = _mm_unpackhi_epi64(u0, u1);
-
- t0 = _mm_unpackhi_epi16(in[0], in[1]);
- t1 = _mm_unpackhi_epi16(in[2], in[3]);
- t2 = _mm_unpackhi_epi16(in[4], in[5]);
- t3 = _mm_unpackhi_epi16(in[6], in[7]);
-
- u0 = _mm_unpacklo_epi32(t0, t1);
- u1 = _mm_unpacklo_epi32(t2, t3);
-
- out[4] = _mm_unpacklo_epi64(u0, u1);
- out[5] = _mm_unpackhi_epi64(u0, u1);
-
- // Ignore out[6] and out[7]
- // they're zero vectors.
-}
-
-static void filter_horiz_v8p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
- __m128i *f, int tapsNum, uint16_t *buf) {
- __m128i s[8], t[6];
- __m128i min_x2x3, max_x2x3;
- __m128i temp;
-
- assert(tapsNum == 10 || tapsNum == 12);
- if (tapsNum == 10) {
- src_ptr -= 1;
- }
- s[0] = _mm_loadu_si128((const __m128i *)src_ptr);
- s[1] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
- s[2] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
- s[3] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
- s[4] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
- s[5] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
- s[6] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
- s[7] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
-
- // TRANSPOSE...
- // Vecotor represents column pixel pairs instead of a row
- transpose_8x16(s, t);
-
- // multiply 2 adjacent elements with the filter and add the result
- s[0] = _mm_maddubs_epi16(t[0], f[0]);
- s[1] = _mm_maddubs_epi16(t[1], f[1]);
- s[2] = _mm_maddubs_epi16(t[2], f[2]);
- s[3] = _mm_maddubs_epi16(t[3], f[3]);
- s[4] = _mm_maddubs_epi16(t[4], f[4]);
- s[5] = _mm_maddubs_epi16(t[5], f[5]);
-
- // add and saturate the results together
- min_x2x3 = _mm_min_epi16(s[2], s[3]);
- max_x2x3 = _mm_max_epi16(s[2], s[3]);
- temp = _mm_adds_epi16(s[0], s[1]);
- temp = _mm_adds_epi16(temp, s[5]);
- temp = _mm_adds_epi16(temp, s[4]);
-
- temp = _mm_adds_epi16(temp, min_x2x3);
- temp = _mm_adds_epi16(temp, max_x2x3);
-
- _mm_storeu_si128((__m128i *)buf, temp);
-}
-
-// Vertical 4-pixel parallel
-static INLINE void transpose4x4_direct_to_dst(const uint16_t *src,
- int src_stride, uint8_t *dst,
- int dst_stride) {
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- __m128i v0, v1, v2, v3;
-
- // TODO(luoyi): two loads, 8 elements per load (two bytes per element)
- __m128i u0 = _mm_loadl_epi64((__m128i const *)(src + 0 * src_stride));
- __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + 1 * src_stride));
- __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
- __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
-
- v0 = _mm_unpacklo_epi16(u0, u1);
- v1 = _mm_unpacklo_epi16(u2, u3);
-
- v2 = _mm_unpacklo_epi32(v0, v1);
- v3 = _mm_unpackhi_epi32(v0, v1);
-
- u0 = _mm_mulhrs_epi16(v2, k_256);
- u1 = _mm_mulhrs_epi16(v3, k_256);
-
- u0 = _mm_packus_epi16(u0, u1);
- u1 = _mm_srli_si128(u0, 4);
- u2 = _mm_srli_si128(u0, 8);
- u3 = _mm_srli_si128(u0, 12);
-
- *(int *)(dst) = _mm_cvtsi128_si32(u0);
- *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
- *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
- *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
-}
-
-static INLINE void transpose4x4_accumu_to_dst(const uint16_t *src,
- int src_stride, uint8_t *dst,
- int dst_stride) {
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
-
- __m128i v0, v1, v2, v3;
-
- __m128i u0 = _mm_loadl_epi64((__m128i const *)(src));
- __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + src_stride));
- __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
- __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
-
- v0 = _mm_unpacklo_epi16(u0, u1);
- v1 = _mm_unpacklo_epi16(u2, u3);
-
- v2 = _mm_unpacklo_epi32(v0, v1);
- v3 = _mm_unpackhi_epi32(v0, v1);
-
- u0 = _mm_mulhrs_epi16(v2, k_256);
- u1 = _mm_mulhrs_epi16(v3, k_256);
-
- u2 = _mm_packus_epi16(u0, u1);
- u0 = _mm_unpacklo_epi8(u2, zero);
- u1 = _mm_unpackhi_epi8(u2, zero);
-
- // load pixel values
- v0 = _mm_loadl_epi64((__m128i const *)(dst));
- v1 = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
- v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
- v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
-
- v0 = _mm_unpacklo_epi8(v0, zero);
- v1 = _mm_unpacklo_epi8(v1, zero);
- v2 = _mm_unpacklo_epi8(v2, zero);
- v3 = _mm_unpacklo_epi8(v3, zero);
-
- v0 = _mm_unpacklo_epi64(v0, v1);
- v1 = _mm_unpacklo_epi64(v2, v3);
-
- u0 = _mm_adds_epi16(u0, v0);
- u1 = _mm_adds_epi16(u1, v1);
-
- u0 = _mm_adds_epi16(u0, one);
- u1 = _mm_adds_epi16(u1, one);
-
- u0 = _mm_srai_epi16(u0, 1);
- u1 = _mm_srai_epi16(u1, 1);
-
- // saturation and pack to pixels
- u0 = _mm_packus_epi16(u0, u1);
- u1 = _mm_srli_si128(u0, 4);
- u2 = _mm_srli_si128(u0, 8);
- u3 = _mm_srli_si128(u0, 12);
-
- *(int *)(dst) = _mm_cvtsi128_si32(u0);
- *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
- *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
- *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
-}
-
-static transpose_to_dst_t trans4x4Tab[2] = { transpose4x4_direct_to_dst,
- transpose4x4_accumu_to_dst };
-
-static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
- __m128i *f, int tapsNum, uint16_t *buf) {
- __m128i A, B, C, D;
- __m128i tr0_0, tr0_1, s1s0, s3s2, s5s4, s7s6, s9s8, sbsa;
- __m128i x0, x1, x2, x3, x4, x5;
- __m128i min_x2x3, max_x2x3, temp;
-
- assert(tapsNum == 10 || tapsNum == 12);
- if (tapsNum == 10) {
- src_ptr -= 1;
- }
- A = _mm_loadu_si128((const __m128i *)src_ptr);
- B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
- C = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
- D = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
-
- // TRANSPOSE...
- // Vecotor represents column pixel pairs instead of a row
- // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17
- tr0_0 = _mm_unpacklo_epi16(A, B);
- // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37
- tr0_1 = _mm_unpacklo_epi16(C, D);
- // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33
- s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
- // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37
- s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1);
- // 02 03 12 13 22 23 32 33
- s3s2 = _mm_srli_si128(s1s0, 8);
- // 06 07 16 17 26 27 36 37
- s7s6 = _mm_srli_si128(s5s4, 8);
-
- tr0_0 = _mm_unpackhi_epi16(A, B);
- tr0_1 = _mm_unpackhi_epi16(C, D);
- s9s8 = _mm_unpacklo_epi32(tr0_0, tr0_1);
- sbsa = _mm_srli_si128(s9s8, 8);
-
- // multiply 2 adjacent elements with the filter and add the result
- x0 = _mm_maddubs_epi16(s1s0, f[0]);
- x1 = _mm_maddubs_epi16(s3s2, f[1]);
- x2 = _mm_maddubs_epi16(s5s4, f[2]);
- x3 = _mm_maddubs_epi16(s7s6, f[3]);
- x4 = _mm_maddubs_epi16(s9s8, f[4]);
- x5 = _mm_maddubs_epi16(sbsa, f[5]);
- // add and saturate the results together
- min_x2x3 = _mm_min_epi16(x2, x3);
- max_x2x3 = _mm_max_epi16(x2, x3);
- temp = _mm_adds_epi16(x0, x1);
- temp = _mm_adds_epi16(temp, x5);
- temp = _mm_adds_epi16(temp, x4);
-
- temp = _mm_adds_epi16(temp, min_x2x3);
- temp = _mm_adds_epi16(temp, max_x2x3);
- _mm_storel_epi64((__m128i *)buf, temp);
-}
-
-// Note:
-// This function assumes:
-// (1) 10/12-taps filters
-// (2) x_step_q4 = 16 then filter is fixed at the call
-
-void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- ConvolveParams *conv_params) {
- DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]);
- __m128i verf[6];
- __m128i horf[2];
- SubpelFilterCoeffs hCoeffs, vCoeffs;
- assert(conv_params->do_average == 0 || conv_params->do_average == 1);
- const uint8_t *src_ptr;
- store_pixel_t store2p = store2pixelTab[conv_params->do_average];
- store_pixel_t store4p = store4pixelTab[conv_params->do_average];
- transpose_to_dst_t transpose_4x4 = trans4x4Tab[conv_params->do_average];
- transpose_to_dst_t transpose_8x8 = trans8x8Tab[conv_params->do_average];
-
- const int tapsNum = filter_params.taps;
- int block_height, block_residu;
- int i, col, count;
- (void)x_step_q4;
-
- if (0 == subpel_x_q4 || 16 != x_step_q4) {
- av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_x_q4, x_step_q4, conv_params);
- return;
- }
-
- hCoeffs = get_subpel_filter_signal_dir(filter_params, subpel_x_q4 - 1);
- vCoeffs = get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
-
- if (!hCoeffs || !vCoeffs) {
- av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_x_q4, x_step_q4, conv_params);
- return;
- }
-
- verf[0] = *((const __m128i *)(vCoeffs));
- verf[1] = *((const __m128i *)(vCoeffs + 1));
- verf[2] = *((const __m128i *)(vCoeffs + 2));
- verf[3] = *((const __m128i *)(vCoeffs + 3));
- verf[4] = *((const __m128i *)(vCoeffs + 4));
- verf[5] = *((const __m128i *)(vCoeffs + 5));
-
- horf[0] = *((const __m128i *)(hCoeffs));
- horf[1] = *((const __m128i *)(hCoeffs + 1));
-
- count = 0;
-
- // here tapsNum is filter size
- src -= (tapsNum >> 1) - 1;
- src_ptr = src;
- if (w > WIDTH_BOUND && h > HEIGHT_BOUND) {
- // 8-pixels parallel
- block_height = h >> 3;
- block_residu = h & 7;
-
- do {
- for (col = 0; col < w; col += 8) {
- for (i = 0; i < 8; ++i) {
- filter_horiz_v8p_ssse3(src_ptr, src_stride, verf, tapsNum,
- temp + (i * 8));
- src_ptr += 1;
- }
- transpose_8x8(temp, 8, dst + col, dst_stride);
- }
- count++;
- src_ptr = src + count * src_stride * 8;
- dst += dst_stride * 8;
- } while (count < block_height);
-
- for (i = 0; i < block_residu; ++i) {
- filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
- src_ptr += src_stride;
- dst += dst_stride;
- }
- } else {
- if (w > 2) {
- // 4-pixels parallel
- block_height = h >> 2;
- block_residu = h & 3;
-
- do {
- for (col = 0; col < w; col += 4) {
- for (i = 0; i < 4; ++i) {
- filter_horiz_v4p_ssse3(src_ptr, src_stride, verf, tapsNum,
- temp + (i * 4));
- src_ptr += 1;
- }
- transpose_4x4(temp, 4, dst + col, dst_stride);
- }
- count++;
- src_ptr = src + count * src_stride * 4;
- dst += dst_stride * 4;
- } while (count < block_height);
-
- for (i = 0; i < block_residu; ++i) {
- filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
- src_ptr += src_stride;
- dst += dst_stride;
- }
- } else {
- for (i = 0; i < h; i++) {
- filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store2p, dst);
- src_ptr += src_stride;
- dst += dst_stride;
- }
- }
- }
-}
-
-// Vertical convolution filtering
-static INLINE void store_8_pixel_only(const __m128i *x, uint8_t *dst) {
- __m128i u = _mm_packus_epi16(*x, *x);
- _mm_storel_epi64((__m128i *)dst, u);
-}
-
-static INLINE void accumulate_store_8_pixel(const __m128i *x, uint8_t *dst) {
- __m128i y = accumulate_store(x, dst);
- _mm_storel_epi64((__m128i *)dst, y);
-}
-
-static store_pixel_t store8pixelTab[2] = { store_8_pixel_only,
- accumulate_store_8_pixel };
-
-static __m128i filter_vert_ssse3(const uint8_t *src, int src_stride,
- int tapsNum, __m128i *f) {
- __m128i s[12];
- const __m128i k_256 = _mm_set1_epi16(1 << 8);
- const __m128i zero = _mm_setzero_si128();
- __m128i min_x2x3, max_x2x3, sum;
- int i = 0;
- int r = 0;
-
- if (10 == tapsNum) {
- i += 1;
- s[0] = zero;
- }
- while (i < 12) {
- s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
- i += 1;
- r += 1;
- }
-
- s[0] = _mm_unpacklo_epi8(s[0], s[1]);
- s[2] = _mm_unpacklo_epi8(s[2], s[3]);
- s[4] = _mm_unpacklo_epi8(s[4], s[5]);
- s[6] = _mm_unpacklo_epi8(s[6], s[7]);
- s[8] = _mm_unpacklo_epi8(s[8], s[9]);
- s[10] = _mm_unpacklo_epi8(s[10], s[11]);
-
- s[0] = _mm_maddubs_epi16(s[0], f[0]);
- s[2] = _mm_maddubs_epi16(s[2], f[1]);
- s[4] = _mm_maddubs_epi16(s[4], f[2]);
- s[6] = _mm_maddubs_epi16(s[6], f[3]);
- s[8] = _mm_maddubs_epi16(s[8], f[4]);
- s[10] = _mm_maddubs_epi16(s[10], f[5]);
-
- min_x2x3 = _mm_min_epi16(s[4], s[6]);
- max_x2x3 = _mm_max_epi16(s[4], s[6]);
- sum = _mm_adds_epi16(s[0], s[2]);
- sum = _mm_adds_epi16(sum, s[10]);
- sum = _mm_adds_epi16(sum, s[8]);
-
- sum = _mm_adds_epi16(sum, min_x2x3);
- sum = _mm_adds_epi16(sum, max_x2x3);
-
- sum = _mm_mulhrs_epi16(sum, k_256);
- sum = _mm_packus_epi16(sum, sum);
- sum = _mm_unpacklo_epi8(sum, zero);
- return sum;
-}
-
-static void filter_vert_horiz_parallel_ssse3(const uint8_t *src, int src_stride,
- __m128i *f, int tapsNum,
- store_pixel_t store_func,
- uint8_t *dst) {
- __m128i sum = filter_vert_ssse3(src, src_stride, tapsNum, f);
- store_func(&sum, dst);
-}
-
-static void filter_vert_compute_small(const uint8_t *src, int src_stride,
- __m128i *f, int tapsNum,
- store_pixel_t store_func, int h,
- uint8_t *dst, int dst_stride) {
- int rowIndex = 0;
- do {
- filter_vert_horiz_parallel_ssse3(src, src_stride, f, tapsNum, store_func,
- dst);
- rowIndex++;
- src += src_stride;
- dst += dst_stride;
- } while (rowIndex < h);
-}
-
-static void filter_vert_compute_large(const uint8_t *src, int src_stride,
- __m128i *f, int tapsNum,
- store_pixel_t store_func, int w, int h,
- uint8_t *dst, int dst_stride) {
- int col;
- int rowIndex = 0;
- const uint8_t *src_ptr = src;
- uint8_t *dst_ptr = dst;
-
- do {
- for (col = 0; col < w; col += 8) {
- filter_vert_horiz_parallel_ssse3(src_ptr, src_stride, f, tapsNum,
- store_func, dst_ptr);
- src_ptr += 8;
- dst_ptr += 8;
- }
- rowIndex++;
- src_ptr = src + rowIndex * src_stride;
- dst_ptr = dst + rowIndex * dst_stride;
- } while (rowIndex < h);
-}
-
-void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- ConvolveParams *conv_params) {
- __m128i verf[6];
- SubpelFilterCoeffs vCoeffs;
- const uint8_t *src_ptr;
- assert(conv_params->do_average == 0 || conv_params->do_average == 1);
- uint8_t *dst_ptr = dst;
- store_pixel_t store2p = store2pixelTab[conv_params->do_average];
- store_pixel_t store4p = store4pixelTab[conv_params->do_average];
- store_pixel_t store8p = store8pixelTab[conv_params->do_average];
- const int tapsNum = filter_params.taps;
-
- if (0 == subpel_y_q4 || 16 != y_step_q4) {
- av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, conv_params);
- return;
- }
-
- vCoeffs = get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
-
- if (!vCoeffs) {
- av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
- subpel_y_q4, y_step_q4, conv_params);
- return;
- }
-
- verf[0] = *((const __m128i *)(vCoeffs));
- verf[1] = *((const __m128i *)(vCoeffs + 1));
- verf[2] = *((const __m128i *)(vCoeffs + 2));
- verf[3] = *((const __m128i *)(vCoeffs + 3));
- verf[4] = *((const __m128i *)(vCoeffs + 4));
- verf[5] = *((const __m128i *)(vCoeffs + 5));
-
- src -= src_stride * ((tapsNum >> 1) - 1);
- src_ptr = src;
-
- if (w > 4) {
- filter_vert_compute_large(src_ptr, src_stride, verf, tapsNum, store8p, w, h,
- dst_ptr, dst_stride);
- } else if (4 == w) {
- filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store4p, h,
- dst_ptr, dst_stride);
- } else if (2 == w) {
- filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store2p, h,
- dst_ptr, dst_stride);
- } else {
- assert(0);
- }
-}
-
-static void init_simd_horiz_filter(const int16_t *filter_ptr, int taps,
- int8_t (*simd_horiz_filter)[2][16]) {
- int shift;
- int offset = (12 - taps) / 2;
- const int16_t *filter_row;
- for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
- int i;
- filter_row = filter_ptr + shift * taps;
- for (i = 0; i < offset; ++i) simd_horiz_filter[shift - 1][0][i] = 0;
-
- for (i = 0; i < offset + 2; ++i) simd_horiz_filter[shift - 1][1][i] = 0;
-
- for (i = 0; i < taps; ++i) {
- simd_horiz_filter[shift - 1][0][i + offset] = (int8_t)filter_row[i];
- simd_horiz_filter[shift - 1][1][i + offset + 2] = (int8_t)filter_row[i];
- }
-
- for (i = offset + taps; i < 16; ++i) simd_horiz_filter[shift - 1][0][i] = 0;
-
- for (i = offset + 2 + taps; i < 16; ++i)
- simd_horiz_filter[shift - 1][1][i] = 0;
- }
-}
-
-static void init_simd_vert_filter(const int16_t *filter_ptr, int taps,
- int8_t (*simd_vert_filter)[6][16]) {
- int shift;
- int offset = (12 - taps) / 2;
- const int16_t *filter_row;
- for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
- int i;
- filter_row = filter_ptr + shift * taps;
- for (i = 0; i < 6; ++i) {
- int j;
- for (j = 0; j < 16; ++j) {
- int c = i * 2 + (j % 2) - offset;
- if (c >= 0 && c < taps)
- simd_vert_filter[shift - 1][i][j] = (int8_t)filter_row[c];
- else
- simd_vert_filter[shift - 1][i][j] = 0;
- }
- }
- }
-}
-
-typedef struct SimdFilter {
- InterpFilter interp_filter;
- int8_t (*simd_horiz_filter)[2][16];
- int8_t (*simd_vert_filter)[6][16];
-} SimdFilter;
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-#define MULTITAP_FILTER_NUM 1
-SimdFilter simd_filters[MULTITAP_FILTER_NUM] = {
- { MULTITAP_SHARP, &sub_pel_filters_12sharp_signal_dir[0],
- &sub_pel_filters_12sharp_ver_signal_dir[0] },
-};
-#endif
-
-#if USE_TEMPORALFILTER_12TAP
-SimdFilter temporal_simd_filter = {
- TEMPORALFILTER_12TAP, &sub_pel_filters_temporalfilter_12_signal_dir[0],
- &sub_pel_filters_temporalfilter_12_ver_signal_dir[0]
-};
-#endif
-
-void av1_lowbd_convolve_init_ssse3(void) {
-#if USE_TEMPORALFILTER_12TAP
- {
- InterpFilterParams filter_params =
- av1_get_interp_filter_params(temporal_simd_filter.interp_filter);
- int taps = filter_params.taps;
- const int16_t *filter_ptr = filter_params.filter_ptr;
- init_simd_horiz_filter(filter_ptr, taps,
- temporal_simd_filter.simd_horiz_filter);
- init_simd_vert_filter(filter_ptr, taps,
- temporal_simd_filter.simd_vert_filter);
- }
-#endif
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- {
- int i;
- for (i = 0; i < MULTITAP_FILTER_NUM; ++i) {
- InterpFilter interp_filter = simd_filters[i].interp_filter;
- InterpFilterParams filter_params =
- av1_get_interp_filter_params(interp_filter);
- int taps = filter_params.taps;
- const int16_t *filter_ptr = filter_params.filter_ptr;
- init_simd_horiz_filter(filter_ptr, taps,
- simd_filters[i].simd_horiz_filter);
- init_simd_vert_filter(filter_ptr, taps, simd_filters[i].simd_vert_filter);
- }
- }
-#endif
- return;
-}
diff --git a/third_party/aom/av1/common/x86/av1_fwd_txfm1d_sse4.c b/third_party/aom/av1/common/x86/av1_fwd_txfm1d_sse4.c
deleted file mode 100644
index 97d2e74b1..000000000
--- a/third_party/aom/av1/common/x86/av1_fwd_txfm1d_sse4.c
+++ /dev/null
@@ -1,839 +0,0 @@
-#include "av1/common/x86/av1_txfm1d_sse4.h"
-
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int txfm_size = 32;
- const int num_per_128 = 4;
- const int32_t *cospi;
- __m128i buf0[32];
- __m128i buf1[32];
- int col_num = txfm_size / num_per_128;
- int bit;
- int col;
- (void)stage_range;
- for (col = 0; col < col_num; col++) {
- // stage 0;
- int32_t stage_idx = 0;
- int j;
- for (j = 0; j < 32; ++j) {
- buf0[j] = input[j * col_num + col];
- }
-
- // stage 1
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[31]);
- buf1[31] = _mm_sub_epi32(buf0[0], buf0[31]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[30]);
- buf1[30] = _mm_sub_epi32(buf0[1], buf0[30]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[2], buf0[29]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[28]);
- buf1[28] = _mm_sub_epi32(buf0[3], buf0[28]);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[4], buf0[27]);
- buf1[5] = _mm_add_epi32(buf0[5], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[5], buf0[26]);
- buf1[6] = _mm_add_epi32(buf0[6], buf0[25]);
- buf1[25] = _mm_sub_epi32(buf0[6], buf0[25]);
- buf1[7] = _mm_add_epi32(buf0[7], buf0[24]);
- buf1[24] = _mm_sub_epi32(buf0[7], buf0[24]);
- buf1[8] = _mm_add_epi32(buf0[8], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[8], buf0[23]);
- buf1[9] = _mm_add_epi32(buf0[9], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[9], buf0[22]);
- buf1[10] = _mm_add_epi32(buf0[10], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[10], buf0[21]);
- buf1[11] = _mm_add_epi32(buf0[11], buf0[20]);
- buf1[20] = _mm_sub_epi32(buf0[11], buf0[20]);
- buf1[12] = _mm_add_epi32(buf0[12], buf0[19]);
- buf1[19] = _mm_sub_epi32(buf0[12], buf0[19]);
- buf1[13] = _mm_add_epi32(buf0[13], buf0[18]);
- buf1[18] = _mm_sub_epi32(buf0[13], buf0[18]);
- buf1[14] = _mm_add_epi32(buf0[14], buf0[17]);
- buf1[17] = _mm_sub_epi32(buf0[14], buf0[17]);
- buf1[15] = _mm_add_epi32(buf0[15], buf0[16]);
- buf1[16] = _mm_sub_epi32(buf0[15], buf0[16]);
-
- // stage 2
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = _mm_add_epi32(buf1[0], buf1[15]);
- buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]);
- buf0[1] = _mm_add_epi32(buf1[1], buf1[14]);
- buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]);
- buf0[2] = _mm_add_epi32(buf1[2], buf1[13]);
- buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]);
- buf0[3] = _mm_add_epi32(buf1[3], buf1[12]);
- buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]);
- buf0[4] = _mm_add_epi32(buf1[4], buf1[11]);
- buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]);
- buf0[5] = _mm_add_epi32(buf1[5], buf1[10]);
- buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]);
- buf0[6] = _mm_add_epi32(buf1[6], buf1[9]);
- buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]);
- buf0[7] = _mm_add_epi32(buf1[7], buf1[8]);
- buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- buf0[18] = buf1[18];
- buf0[19] = buf1[19];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20],
- buf0[27], bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21],
- buf0[26], bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22],
- buf0[25], bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23],
- buf0[24], bit);
- buf0[28] = buf1[28];
- buf0[29] = buf1[29];
- buf0[30] = buf1[30];
- buf0[31] = buf1[31];
-
- // stage 3
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
- buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
- buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
- buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
- buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
- buf1[8] = buf0[8];
- buf1[9] = buf0[9];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10],
- buf1[13], bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11],
- buf1[12], bit);
- buf1[14] = buf0[14];
- buf1[15] = buf0[15];
- buf1[16] = _mm_add_epi32(buf0[16], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]);
- buf1[18] = _mm_add_epi32(buf0[18], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[20]);
- buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]);
- buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[24]);
- buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]);
- buf1[30] = _mm_add_epi32(buf0[30], buf0[25]);
- buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]);
- buf1[29] = _mm_add_epi32(buf0[29], buf0[26]);
- buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]);
- buf1[28] = _mm_add_epi32(buf0[28], buf0[27]);
-
- // stage 4
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
- buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
- buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
- buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
- buf0[4] = buf1[4];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5],
- buf0[6], bit);
- buf0[7] = buf1[7];
- buf0[8] = _mm_add_epi32(buf1[8], buf1[11]);
- buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]);
- buf0[9] = _mm_add_epi32(buf1[9], buf1[10]);
- buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]);
- buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]);
- buf0[15] = _mm_add_epi32(buf1[15], buf1[12]);
- buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]);
- buf0[14] = _mm_add_epi32(buf1[14], buf1[13]);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18],
- buf0[29], bit);
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19],
- buf0[28], bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20],
- buf0[27], bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21],
- buf0[26], bit);
- buf0[22] = buf1[22];
- buf0[23] = buf1[23];
- buf0[24] = buf1[24];
- buf0[25] = buf1[25];
- buf0[30] = buf1[30];
- buf0[31] = buf1[31];
-
- // stage 5
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0],
- buf1[1], bit);
- btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2],
- buf1[3], bit);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
- buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
- buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
- buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
- buf1[8] = buf0[8];
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9],
- buf1[14], bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10],
- buf1[13], bit);
- buf1[11] = buf0[11];
- buf1[12] = buf0[12];
- buf1[15] = buf0[15];
- buf1[16] = _mm_add_epi32(buf0[16], buf0[19]);
- buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[18]);
- buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]);
- buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]);
- buf1[23] = _mm_add_epi32(buf0[23], buf0[20]);
- buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]);
- buf1[22] = _mm_add_epi32(buf0[22], buf0[21]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]);
- buf1[25] = _mm_add_epi32(buf0[25], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]);
- buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[28]);
- buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]);
- buf1[30] = _mm_add_epi32(buf0[30], buf0[29]);
-
- // stage 6
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
- bit);
- btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5],
- buf0[6], bit);
- buf0[8] = _mm_add_epi32(buf1[8], buf1[9]);
- buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]);
- buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]);
- buf0[11] = _mm_add_epi32(buf1[11], buf1[10]);
- buf0[12] = _mm_add_epi32(buf1[12], buf1[13]);
- buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]);
- buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]);
- buf0[15] = _mm_add_epi32(buf1[15], buf1[14]);
- buf0[16] = buf1[16];
- btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17],
- buf0[30], bit);
- btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18],
- buf0[29], bit);
- buf0[19] = buf1[19];
- buf0[20] = buf1[20];
- btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21],
- buf0[26], bit);
- btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22],
- buf0[25], bit);
- buf0[23] = buf1[23];
- buf0[24] = buf1[24];
- buf0[27] = buf1[27];
- buf0[28] = buf1[28];
- buf0[31] = buf1[31];
-
- // stage 7
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf1[0] = buf0[0];
- buf1[1] = buf0[1];
- buf1[2] = buf0[2];
- buf1[3] = buf0[3];
- buf1[4] = buf0[4];
- buf1[5] = buf0[5];
- buf1[6] = buf0[6];
- buf1[7] = buf0[7];
- btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8],
- buf1[15], bit);
- btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9],
- buf1[14], bit);
- btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10],
- buf1[13], bit);
- btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11],
- buf1[12], bit);
- buf1[16] = _mm_add_epi32(buf0[16], buf0[17]);
- buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]);
- buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[18]);
- buf1[20] = _mm_add_epi32(buf0[20], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]);
- buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]);
- buf1[23] = _mm_add_epi32(buf0[23], buf0[22]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[25]);
- buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]);
- buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]);
- buf1[27] = _mm_add_epi32(buf0[27], buf0[26]);
- buf1[28] = _mm_add_epi32(buf0[28], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]);
- buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[30]);
-
- // stage 8
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- buf0[4] = buf1[4];
- buf0[5] = buf1[5];
- buf0[6] = buf1[6];
- buf0[7] = buf1[7];
- buf0[8] = buf1[8];
- buf0[9] = buf1[9];
- buf0[10] = buf1[10];
- buf0[11] = buf1[11];
- buf0[12] = buf1[12];
- buf0[13] = buf1[13];
- buf0[14] = buf1[14];
- buf0[15] = buf1[15];
- btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16],
- buf0[31], bit);
- btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17],
- buf0[30], bit);
- btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18],
- buf0[29], bit);
- btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19],
- buf0[28], bit);
- btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20],
- buf0[27], bit);
- btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21],
- buf0[26], bit);
- btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22],
- buf0[25], bit);
- btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23],
- buf0[24], bit);
-
- // stage 9
- stage_idx++;
- buf1[0] = buf0[0];
- buf1[1] = buf0[16];
- buf1[2] = buf0[8];
- buf1[3] = buf0[24];
- buf1[4] = buf0[4];
- buf1[5] = buf0[20];
- buf1[6] = buf0[12];
- buf1[7] = buf0[28];
- buf1[8] = buf0[2];
- buf1[9] = buf0[18];
- buf1[10] = buf0[10];
- buf1[11] = buf0[26];
- buf1[12] = buf0[6];
- buf1[13] = buf0[22];
- buf1[14] = buf0[14];
- buf1[15] = buf0[30];
- buf1[16] = buf0[1];
- buf1[17] = buf0[17];
- buf1[18] = buf0[9];
- buf1[19] = buf0[25];
- buf1[20] = buf0[5];
- buf1[21] = buf0[21];
- buf1[22] = buf0[13];
- buf1[23] = buf0[29];
- buf1[24] = buf0[3];
- buf1[25] = buf0[19];
- buf1[26] = buf0[11];
- buf1[27] = buf0[27];
- buf1[28] = buf0[7];
- buf1[29] = buf0[23];
- buf1[30] = buf0[15];
- buf1[31] = buf0[31];
-
- for (j = 0; j < 32; ++j) {
- output[j * col_num + col] = buf1[j];
- }
- }
-}
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int txfm_size = 4;
- const int num_per_128 = 4;
- const int32_t *cospi;
- __m128i buf0[4];
- __m128i buf1[4];
- int col_num = txfm_size / num_per_128;
- int bit;
- int col;
- (void)stage_range;
- for (col = 0; col < col_num; col++) {
- // stage 0;
- int32_t stage_idx = 0;
- int j;
- for (j = 0; j < 4; ++j) {
- buf0[j] = input[j * col_num + col];
- }
-
- // stage 1
- stage_idx++;
- buf1[0] = buf0[3];
- buf1[1] = buf0[0];
- buf1[2] = buf0[1];
- buf1[3] = buf0[2];
-
- // stage 2
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1],
- bit);
- btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2],
- buf0[3], bit);
-
- // stage 3
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
- buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
- buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
-
- // stage 4
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
- buf0[3], bit);
-
- // stage 5
- stage_idx++;
- buf1[0] = buf0[0];
- buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
- buf1[2] = buf0[3];
- buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
-
- for (j = 0; j < 4; ++j) {
- output[j * col_num + col] = buf1[j];
- }
- }
-}
-
-void av1_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range) {
- const int txfm_size = 32;
- const int num_per_128 = 4;
- const int32_t *cospi;
- __m128i buf0[32];
- __m128i buf1[32];
- int col_num = txfm_size / num_per_128;
- int bit;
- int col;
- (void)stage_range;
- for (col = 0; col < col_num; col++) {
- // stage 0;
- int32_t stage_idx = 0;
- int j;
- for (j = 0; j < 32; ++j) {
- buf0[j] = input[j * col_num + col];
- }
-
- // stage 1
- stage_idx++;
- buf1[0] = buf0[31];
- buf1[1] = buf0[0];
- buf1[2] = buf0[29];
- buf1[3] = buf0[2];
- buf1[4] = buf0[27];
- buf1[5] = buf0[4];
- buf1[6] = buf0[25];
- buf1[7] = buf0[6];
- buf1[8] = buf0[23];
- buf1[9] = buf0[8];
- buf1[10] = buf0[21];
- buf1[11] = buf0[10];
- buf1[12] = buf0[19];
- buf1[13] = buf0[12];
- buf1[14] = buf0[17];
- buf1[15] = buf0[14];
- buf1[16] = buf0[15];
- buf1[17] = buf0[16];
- buf1[18] = buf0[13];
- buf1[19] = buf0[18];
- buf1[20] = buf0[11];
- buf1[21] = buf0[20];
- buf1[22] = buf0[9];
- buf1[23] = buf0[22];
- buf1[24] = buf0[7];
- buf1[25] = buf0[24];
- buf1[26] = buf0[5];
- buf1[27] = buf0[26];
- buf1[28] = buf0[3];
- buf1[29] = buf0[28];
- buf1[30] = buf0[1];
- buf1[31] = buf0[30];
-
- // stage 2
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- btf_32_sse4_1_type0(cospi[1], cospi[63], buf1[0], buf1[1], buf0[0], buf0[1],
- bit);
- btf_32_sse4_1_type0(cospi[5], cospi[59], buf1[2], buf1[3], buf0[2], buf0[3],
- bit);
- btf_32_sse4_1_type0(cospi[9], cospi[55], buf1[4], buf1[5], buf0[4], buf0[5],
- bit);
- btf_32_sse4_1_type0(cospi[13], cospi[51], buf1[6], buf1[7], buf0[6],
- buf0[7], bit);
- btf_32_sse4_1_type0(cospi[17], cospi[47], buf1[8], buf1[9], buf0[8],
- buf0[9], bit);
- btf_32_sse4_1_type0(cospi[21], cospi[43], buf1[10], buf1[11], buf0[10],
- buf0[11], bit);
- btf_32_sse4_1_type0(cospi[25], cospi[39], buf1[12], buf1[13], buf0[12],
- buf0[13], bit);
- btf_32_sse4_1_type0(cospi[29], cospi[35], buf1[14], buf1[15], buf0[14],
- buf0[15], bit);
- btf_32_sse4_1_type0(cospi[33], cospi[31], buf1[16], buf1[17], buf0[16],
- buf0[17], bit);
- btf_32_sse4_1_type0(cospi[37], cospi[27], buf1[18], buf1[19], buf0[18],
- buf0[19], bit);
- btf_32_sse4_1_type0(cospi[41], cospi[23], buf1[20], buf1[21], buf0[20],
- buf0[21], bit);
- btf_32_sse4_1_type0(cospi[45], cospi[19], buf1[22], buf1[23], buf0[22],
- buf0[23], bit);
- btf_32_sse4_1_type0(cospi[49], cospi[15], buf1[24], buf1[25], buf0[24],
- buf0[25], bit);
- btf_32_sse4_1_type0(cospi[53], cospi[11], buf1[26], buf1[27], buf0[26],
- buf0[27], bit);
- btf_32_sse4_1_type0(cospi[57], cospi[7], buf1[28], buf1[29], buf0[28],
- buf0[29], bit);
- btf_32_sse4_1_type0(cospi[61], cospi[3], buf1[30], buf1[31], buf0[30],
- buf0[31], bit);
-
- // stage 3
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[16]);
- buf1[16] = _mm_sub_epi32(buf0[0], buf0[16]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[17]);
- buf1[17] = _mm_sub_epi32(buf0[1], buf0[17]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[18]);
- buf1[18] = _mm_sub_epi32(buf0[2], buf0[18]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[19]);
- buf1[19] = _mm_sub_epi32(buf0[3], buf0[19]);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[20]);
- buf1[20] = _mm_sub_epi32(buf0[4], buf0[20]);
- buf1[5] = _mm_add_epi32(buf0[5], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[5], buf0[21]);
- buf1[6] = _mm_add_epi32(buf0[6], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[6], buf0[22]);
- buf1[7] = _mm_add_epi32(buf0[7], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[7], buf0[23]);
- buf1[8] = _mm_add_epi32(buf0[8], buf0[24]);
- buf1[24] = _mm_sub_epi32(buf0[8], buf0[24]);
- buf1[9] = _mm_add_epi32(buf0[9], buf0[25]);
- buf1[25] = _mm_sub_epi32(buf0[9], buf0[25]);
- buf1[10] = _mm_add_epi32(buf0[10], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[10], buf0[26]);
- buf1[11] = _mm_add_epi32(buf0[11], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[11], buf0[27]);
- buf1[12] = _mm_add_epi32(buf0[12], buf0[28]);
- buf1[28] = _mm_sub_epi32(buf0[12], buf0[28]);
- buf1[13] = _mm_add_epi32(buf0[13], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[13], buf0[29]);
- buf1[14] = _mm_add_epi32(buf0[14], buf0[30]);
- buf1[30] = _mm_sub_epi32(buf0[14], buf0[30]);
- buf1[15] = _mm_add_epi32(buf0[15], buf0[31]);
- buf1[31] = _mm_sub_epi32(buf0[15], buf0[31]);
-
- // stage 4
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- buf0[4] = buf1[4];
- buf0[5] = buf1[5];
- buf0[6] = buf1[6];
- buf0[7] = buf1[7];
- buf0[8] = buf1[8];
- buf0[9] = buf1[9];
- buf0[10] = buf1[10];
- buf0[11] = buf1[11];
- buf0[12] = buf1[12];
- buf0[13] = buf1[13];
- buf0[14] = buf1[14];
- buf0[15] = buf1[15];
- btf_32_sse4_1_type0(cospi[4], cospi[60], buf1[16], buf1[17], buf0[16],
- buf0[17], bit);
- btf_32_sse4_1_type0(cospi[20], cospi[44], buf1[18], buf1[19], buf0[18],
- buf0[19], bit);
- btf_32_sse4_1_type0(cospi[36], cospi[28], buf1[20], buf1[21], buf0[20],
- buf0[21], bit);
- btf_32_sse4_1_type0(cospi[52], cospi[12], buf1[22], buf1[23], buf0[22],
- buf0[23], bit);
- btf_32_sse4_1_type0(-cospi[60], cospi[4], buf1[24], buf1[25], buf0[24],
- buf0[25], bit);
- btf_32_sse4_1_type0(-cospi[44], cospi[20], buf1[26], buf1[27], buf0[26],
- buf0[27], bit);
- btf_32_sse4_1_type0(-cospi[28], cospi[36], buf1[28], buf1[29], buf0[28],
- buf0[29], bit);
- btf_32_sse4_1_type0(-cospi[12], cospi[52], buf1[30], buf1[31], buf0[30],
- buf0[31], bit);
-
- // stage 5
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[8]);
- buf1[8] = _mm_sub_epi32(buf0[0], buf0[8]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[9]);
- buf1[9] = _mm_sub_epi32(buf0[1], buf0[9]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[10]);
- buf1[10] = _mm_sub_epi32(buf0[2], buf0[10]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[11]);
- buf1[11] = _mm_sub_epi32(buf0[3], buf0[11]);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[12]);
- buf1[12] = _mm_sub_epi32(buf0[4], buf0[12]);
- buf1[5] = _mm_add_epi32(buf0[5], buf0[13]);
- buf1[13] = _mm_sub_epi32(buf0[5], buf0[13]);
- buf1[6] = _mm_add_epi32(buf0[6], buf0[14]);
- buf1[14] = _mm_sub_epi32(buf0[6], buf0[14]);
- buf1[7] = _mm_add_epi32(buf0[7], buf0[15]);
- buf1[15] = _mm_sub_epi32(buf0[7], buf0[15]);
- buf1[16] = _mm_add_epi32(buf0[16], buf0[24]);
- buf1[24] = _mm_sub_epi32(buf0[16], buf0[24]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[25]);
- buf1[25] = _mm_sub_epi32(buf0[17], buf0[25]);
- buf1[18] = _mm_add_epi32(buf0[18], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[18], buf0[26]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[19], buf0[27]);
- buf1[20] = _mm_add_epi32(buf0[20], buf0[28]);
- buf1[28] = _mm_sub_epi32(buf0[20], buf0[28]);
- buf1[21] = _mm_add_epi32(buf0[21], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[21], buf0[29]);
- buf1[22] = _mm_add_epi32(buf0[22], buf0[30]);
- buf1[30] = _mm_sub_epi32(buf0[22], buf0[30]);
- buf1[23] = _mm_add_epi32(buf0[23], buf0[31]);
- buf1[31] = _mm_sub_epi32(buf0[23], buf0[31]);
-
- // stage 6
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- buf0[4] = buf1[4];
- buf0[5] = buf1[5];
- buf0[6] = buf1[6];
- buf0[7] = buf1[7];
- btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[8], buf1[9], buf0[8], buf0[9],
- bit);
- btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[10], buf1[11], buf0[10],
- buf0[11], bit);
- btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[12], buf1[13], buf0[12],
- buf0[13], bit);
- btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[14], buf1[15], buf0[14],
- buf0[15], bit);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- buf0[18] = buf1[18];
- buf0[19] = buf1[19];
- buf0[20] = buf1[20];
- buf0[21] = buf1[21];
- buf0[22] = buf1[22];
- buf0[23] = buf1[23];
- btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[24], buf1[25], buf0[24],
- buf0[25], bit);
- btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[26], buf1[27], buf0[26],
- buf0[27], bit);
- btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[28], buf1[29], buf0[28],
- buf0[29], bit);
- btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[30], buf1[31], buf0[30],
- buf0[31], bit);
-
- // stage 7
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
- buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
- buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
- buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
- buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
- buf1[8] = _mm_add_epi32(buf0[8], buf0[12]);
- buf1[12] = _mm_sub_epi32(buf0[8], buf0[12]);
- buf1[9] = _mm_add_epi32(buf0[9], buf0[13]);
- buf1[13] = _mm_sub_epi32(buf0[9], buf0[13]);
- buf1[10] = _mm_add_epi32(buf0[10], buf0[14]);
- buf1[14] = _mm_sub_epi32(buf0[10], buf0[14]);
- buf1[11] = _mm_add_epi32(buf0[11], buf0[15]);
- buf1[15] = _mm_sub_epi32(buf0[11], buf0[15]);
- buf1[16] = _mm_add_epi32(buf0[16], buf0[20]);
- buf1[20] = _mm_sub_epi32(buf0[16], buf0[20]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[17], buf0[21]);
- buf1[18] = _mm_add_epi32(buf0[18], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[18], buf0[22]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[19], buf0[23]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[28]);
- buf1[28] = _mm_sub_epi32(buf0[24], buf0[28]);
- buf1[25] = _mm_add_epi32(buf0[25], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[25], buf0[29]);
- buf1[26] = _mm_add_epi32(buf0[26], buf0[30]);
- buf1[30] = _mm_sub_epi32(buf0[26], buf0[30]);
- buf1[27] = _mm_add_epi32(buf0[27], buf0[31]);
- buf1[31] = _mm_sub_epi32(buf0[27], buf0[31]);
-
- // stage 8
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
- buf0[5], bit);
- btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
- buf0[7], bit);
- buf0[8] = buf1[8];
- buf0[9] = buf1[9];
- buf0[10] = buf1[10];
- buf0[11] = buf1[11];
- btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[12], buf1[13], buf0[12],
- buf0[13], bit);
- btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[14], buf1[15], buf0[14],
- buf0[15], bit);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- buf0[18] = buf1[18];
- buf0[19] = buf1[19];
- btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[20], buf1[21], buf0[20],
- buf0[21], bit);
- btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[22], buf1[23], buf0[22],
- buf0[23], bit);
- buf0[24] = buf1[24];
- buf0[25] = buf1[25];
- buf0[26] = buf1[26];
- buf0[27] = buf1[27];
- btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[28], buf1[29], buf0[28],
- buf0[29], bit);
- btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[30], buf1[31], buf0[30],
- buf0[31], bit);
-
- // stage 9
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
- buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
- buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
- buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
- buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
- buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
- buf1[8] = _mm_add_epi32(buf0[8], buf0[10]);
- buf1[10] = _mm_sub_epi32(buf0[8], buf0[10]);
- buf1[9] = _mm_add_epi32(buf0[9], buf0[11]);
- buf1[11] = _mm_sub_epi32(buf0[9], buf0[11]);
- buf1[12] = _mm_add_epi32(buf0[12], buf0[14]);
- buf1[14] = _mm_sub_epi32(buf0[12], buf0[14]);
- buf1[13] = _mm_add_epi32(buf0[13], buf0[15]);
- buf1[15] = _mm_sub_epi32(buf0[13], buf0[15]);
- buf1[16] = _mm_add_epi32(buf0[16], buf0[18]);
- buf1[18] = _mm_sub_epi32(buf0[16], buf0[18]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[19]);
- buf1[19] = _mm_sub_epi32(buf0[17], buf0[19]);
- buf1[20] = _mm_add_epi32(buf0[20], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[20], buf0[22]);
- buf1[21] = _mm_add_epi32(buf0[21], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[21], buf0[23]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[24], buf0[26]);
- buf1[25] = _mm_add_epi32(buf0[25], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[25], buf0[27]);
- buf1[28] = _mm_add_epi32(buf0[28], buf0[30]);
- buf1[30] = _mm_sub_epi32(buf0[28], buf0[30]);
- buf1[29] = _mm_add_epi32(buf0[29], buf0[31]);
- buf1[31] = _mm_sub_epi32(buf0[29], buf0[31]);
-
- // stage 10
- stage_idx++;
- bit = cos_bit[stage_idx];
- cospi = cospi_arr(bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
- buf0[3], bit);
- buf0[4] = buf1[4];
- buf0[5] = buf1[5];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
- buf0[7], bit);
- buf0[8] = buf1[8];
- buf0[9] = buf1[9];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[10], buf1[11], buf0[10],
- buf0[11], bit);
- buf0[12] = buf1[12];
- buf0[13] = buf1[13];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[14], buf1[15], buf0[14],
- buf0[15], bit);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[18], buf1[19], buf0[18],
- buf0[19], bit);
- buf0[20] = buf1[20];
- buf0[21] = buf1[21];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[22], buf1[23], buf0[22],
- buf0[23], bit);
- buf0[24] = buf1[24];
- buf0[25] = buf1[25];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[26], buf1[27], buf0[26],
- buf0[27], bit);
- buf0[28] = buf1[28];
- buf0[29] = buf1[29];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[30], buf1[31], buf0[30],
- buf0[31], bit);
-
- // stage 11
- stage_idx++;
- buf1[0] = buf0[0];
- buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[16]);
- buf1[2] = buf0[24];
- buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[8]);
- buf1[4] = buf0[12];
- buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[28]);
- buf1[6] = buf0[20];
- buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
- buf1[8] = buf0[6];
- buf1[9] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[22]);
- buf1[10] = buf0[30];
- buf1[11] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[14]);
- buf1[12] = buf0[10];
- buf1[13] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[26]);
- buf1[14] = buf0[18];
- buf1[15] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
- buf1[16] = buf0[3];
- buf1[17] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[19]);
- buf1[18] = buf0[27];
- buf1[19] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[11]);
- buf1[20] = buf0[15];
- buf1[21] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[31]);
- buf1[22] = buf0[23];
- buf1[23] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
- buf1[24] = buf0[5];
- buf1[25] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[21]);
- buf1[26] = buf0[29];
- buf1[27] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[13]);
- buf1[28] = buf0[9];
- buf1[29] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[25]);
- buf1[30] = buf0[17];
- buf1[31] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
-
- for (j = 0; j < 32; ++j) {
- output[j * col_num + col] = buf1[j];
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c b/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c
deleted file mode 100644
index 58ede028a..000000000
--- a/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm1d_sse4.h"
-
-static INLINE void int16_array_with_stride_to_int32_array_without_stride(
- const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
- int r, c;
- for (r = 0; r < txfm1d_size; r++) {
- for (c = 0; c < txfm1d_size; c++) {
- output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
- }
- }
-}
-
-typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-
-static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
- switch (txfm_type) {
- case TXFM_TYPE_DCT32: return av1_fdct32_new_sse4_1; break;
- case TXFM_TYPE_ADST32: return av1_fadst32_new_sse4_1; break;
- default: assert(0);
- }
- return NULL;
-}
-
-static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
- const int stride,
- const TXFM_2D_FLIP_CFG *cfg,
- int32_t *txfm_buf) {
- // TODO(sarahparker) This does not currently support rectangular transforms
- // and will break without splitting txfm_size out into row and col size.
- // Rectangular transforms use c code only, so it should be ok for now.
- // It will be corrected when there are sse implementations for rectangular
- // transforms.
- assert(cfg->row_cfg->txfm_size == cfg->col_cfg->txfm_size);
- const int txfm_size = cfg->row_cfg->txfm_size;
- const int8_t *shift = cfg->row_cfg->shift;
- const int8_t *stage_range_col = cfg->col_cfg->stage_range;
- const int8_t *stage_range_row = cfg->row_cfg->stage_range;
- const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
- const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
- const TxfmFuncSSE2 txfm_func_col =
- fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
- const TxfmFuncSSE2 txfm_func_row =
- fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
-
- __m128i *buf_128 = (__m128i *)txfm_buf;
- __m128i *out_128 = (__m128i *)output;
- int num_per_128 = 4;
- int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
-
- int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
- txfm_size);
- round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
- txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
- round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
- transpose_32(txfm_size, out_128, buf_128);
- txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
- round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
- transpose_32(txfm_size, buf_128, out_128);
-}
-
-void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
- TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
- (void)bd;
- fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
diff --git a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c b/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c
index 68461bc36..212d3bd72 100644
--- a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c
+++ b/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c
@@ -12,81 +12,14 @@
#include <assert.h>
#include <smmintrin.h>
-#include "./av1_rtcd.h"
-#include "av1/common/filter.h"
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-DECLARE_ALIGNED(16, static int16_t, subpel_filters_sharp[15][6][8]);
-#endif
-
-#if USE_TEMPORALFILTER_12TAP
-DECLARE_ALIGNED(16, static int16_t, subpel_temporalfilter[15][6][8]);
-#endif
+#include "config/av1_rtcd.h"
-typedef int16_t (*HbdSubpelFilterCoeffs)[8];
+#include "av1/common/filter.h"
typedef void (*TransposeSave)(int width, int pixelsNum, uint32_t *src,
int src_stride, uint16_t *dst, int dst_stride,
int bd);
-static INLINE HbdSubpelFilterCoeffs
-hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- if (p.interp_filter == MULTITAP_SHARP) {
- return &subpel_filters_sharp[index][0];
- }
-#endif
-#if USE_TEMPORALFILTER_12TAP
- if (p.interp_filter == TEMPORALFILTER_12TAP) {
- return &subpel_temporalfilter[index][0];
- }
-#endif
- (void)p;
- (void)index;
- return NULL;
-}
-
-static void init_simd_filter(const int16_t *filter_ptr, int taps,
- int16_t (*simd_filter)[6][8]) {
- int shift;
- int offset = (12 - taps) / 2;
- for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
- const int16_t *filter_row = filter_ptr + shift * taps;
- int i, j;
- for (i = 0; i < 12; ++i) {
- for (j = 0; j < 4; ++j) {
- int r = i / 2;
- int c = j * 2 + (i % 2);
- if (i - offset >= 0 && i - offset < taps)
- simd_filter[shift - 1][r][c] = filter_row[i - offset];
- else
- simd_filter[shift - 1][r][c] = 0;
- }
- }
- }
-}
-
-void av1_highbd_convolve_init_sse4_1(void) {
-#if USE_TEMPORALFILTER_12TAP
- {
- InterpFilterParams filter_params =
- av1_get_interp_filter_params(TEMPORALFILTER_12TAP);
- int taps = filter_params.taps;
- const int16_t *filter_ptr = filter_params.filter_ptr;
- init_simd_filter(filter_ptr, taps, subpel_temporalfilter);
- }
-#endif
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
- {
- InterpFilterParams filter_params =
- av1_get_interp_filter_params(MULTITAP_SHARP);
- int taps = filter_params.taps;
- const int16_t *filter_ptr = filter_params.filter_ptr;
- init_simd_filter(filter_ptr, taps, subpel_filters_sharp);
- }
-#endif
-}
-
// pixelsNum 0: write all 4 pixels
// 1/2/3: residual pixels 1/2/3
static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
@@ -218,138 +151,6 @@ void trans_accum_save_4x4(int width, int pixelsNum, uint32_t *src,
writePixel(u, width, pixelsNum, dst, dst_stride);
}
-static TransposeSave transSaveTab[2] = { trans_save_4x4, trans_accum_save_4x4 };
-
-static INLINE void transpose_pair(__m128i *in, __m128i *out) {
- __m128i x0, x1;
-
- x0 = _mm_unpacklo_epi32(in[0], in[1]);
- x1 = _mm_unpacklo_epi32(in[2], in[3]);
-
- out[0] = _mm_unpacklo_epi64(x0, x1);
- out[1] = _mm_unpackhi_epi64(x0, x1);
-
- x0 = _mm_unpackhi_epi32(in[0], in[1]);
- x1 = _mm_unpackhi_epi32(in[2], in[3]);
-
- out[2] = _mm_unpacklo_epi64(x0, x1);
- out[3] = _mm_unpackhi_epi64(x0, x1);
-
- x0 = _mm_unpacklo_epi32(in[4], in[5]);
- x1 = _mm_unpacklo_epi32(in[6], in[7]);
-
- out[4] = _mm_unpacklo_epi64(x0, x1);
- out[5] = _mm_unpackhi_epi64(x0, x1);
-}
-
-static void highbd_filter_horiz(const uint16_t *src, int src_stride, __m128i *f,
- int tapsNum, uint32_t *buf) {
- __m128i u[8], v[6];
-
- assert(tapsNum == 10 || tapsNum == 12);
- if (tapsNum == 10) {
- src -= 1;
- }
-
- u[0] = _mm_loadu_si128((__m128i const *)src);
- u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
- u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
-
- u[4] = _mm_loadu_si128((__m128i const *)(src + 8));
- u[5] = _mm_loadu_si128((__m128i const *)(src + src_stride + 8));
- u[6] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride + 8));
- u[7] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride + 8));
-
- transpose_pair(u, v);
-
- u[0] = _mm_madd_epi16(v[0], f[0]);
- u[1] = _mm_madd_epi16(v[1], f[1]);
- u[2] = _mm_madd_epi16(v[2], f[2]);
- u[3] = _mm_madd_epi16(v[3], f[3]);
- u[4] = _mm_madd_epi16(v[4], f[4]);
- u[5] = _mm_madd_epi16(v[5], f[5]);
-
- u[6] = _mm_min_epi32(u[2], u[3]);
- u[7] = _mm_max_epi32(u[2], u[3]);
-
- u[0] = _mm_add_epi32(u[0], u[1]);
- u[0] = _mm_add_epi32(u[0], u[5]);
- u[0] = _mm_add_epi32(u[0], u[4]);
- u[0] = _mm_add_epi32(u[0], u[6]);
- u[0] = _mm_add_epi32(u[0], u[7]);
-
- _mm_storeu_si128((__m128i *)buf, u[0]);
-}
-
-void av1_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h,
- const InterpFilterParams filter_params,
- const int subpel_x_q4, int x_step_q4,
- int avg, int bd) {
- DECLARE_ALIGNED(16, uint32_t, temp[4 * 4]);
- __m128i verf[6];
- HbdSubpelFilterCoeffs vCoeffs;
- const uint16_t *srcPtr;
- const int tapsNum = filter_params.taps;
- int i, col, count, blkResidu, blkHeight;
- TransposeSave transSave = transSaveTab[avg];
- (void)x_step_q4;
-
- if (0 == subpel_x_q4 || 16 != x_step_q4) {
- av1_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_x_q4, x_step_q4, avg, bd);
- return;
- }
-
- vCoeffs =
- hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
- if (!vCoeffs) {
- av1_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_x_q4, x_step_q4, avg, bd);
- return;
- }
-
- verf[0] = *((const __m128i *)(vCoeffs));
- verf[1] = *((const __m128i *)(vCoeffs + 1));
- verf[2] = *((const __m128i *)(vCoeffs + 2));
- verf[3] = *((const __m128i *)(vCoeffs + 3));
- verf[4] = *((const __m128i *)(vCoeffs + 4));
- verf[5] = *((const __m128i *)(vCoeffs + 5));
-
- src -= (tapsNum >> 1) - 1;
- srcPtr = src;
-
- count = 0;
- blkHeight = h >> 2;
- blkResidu = h & 3;
-
- while (blkHeight != 0) {
- for (col = 0; col < w; col += 4) {
- for (i = 0; i < 4; ++i) {
- highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
- srcPtr += 1;
- }
- transSave(w, 0, temp, 4, dst + col, dst_stride, bd);
- }
- count++;
- srcPtr = src + count * src_stride * 4;
- dst += dst_stride * 4;
- blkHeight--;
- }
-
- if (blkResidu == 0) return;
-
- for (col = 0; col < w; col += 4) {
- for (i = 0; i < 4; ++i) {
- highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
- srcPtr += 1;
- }
- transSave(w, blkResidu, temp, 4, dst + col, dst_stride, bd);
- }
-}
-
// Vertical convolutional filter
typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst);
@@ -402,134 +203,3 @@ static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
}
WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum };
-
-static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride,
- const __m128i *f, int taps,
- uint16_t *dst, WritePixels saveFunc,
- int bd) {
- __m128i s[12];
- __m128i zero = _mm_setzero_si128();
- int i = 0;
- int r = 0;
-
- // TODO(luoyi) treat s[12] as a circular buffer in width = 2 case
- assert(taps == 10 || taps == 12);
- if (10 == taps) {
- i += 1;
- s[0] = zero;
- }
- while (i < 12) {
- s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
- i += 1;
- r += 1;
- }
-
- s[0] = _mm_unpacklo_epi16(s[0], s[1]);
- s[2] = _mm_unpacklo_epi16(s[2], s[3]);
- s[4] = _mm_unpacklo_epi16(s[4], s[5]);
- s[6] = _mm_unpacklo_epi16(s[6], s[7]);
- s[8] = _mm_unpacklo_epi16(s[8], s[9]);
- s[10] = _mm_unpacklo_epi16(s[10], s[11]);
-
- s[0] = _mm_madd_epi16(s[0], f[0]);
- s[2] = _mm_madd_epi16(s[2], f[1]);
- s[4] = _mm_madd_epi16(s[4], f[2]);
- s[6] = _mm_madd_epi16(s[6], f[3]);
- s[8] = _mm_madd_epi16(s[8], f[4]);
- s[10] = _mm_madd_epi16(s[10], f[5]);
-
- s[1] = _mm_min_epi32(s[4], s[6]);
- s[3] = _mm_max_epi32(s[4], s[6]);
-
- s[0] = _mm_add_epi32(s[0], s[2]);
- s[0] = _mm_add_epi32(s[0], s[10]);
- s[0] = _mm_add_epi32(s[0], s[8]);
- s[0] = _mm_add_epi32(s[0], s[1]);
- s[0] = _mm_add_epi32(s[0], s[3]);
-
- saveFunc(s, bd, dst);
-}
-
-static void highbd_filter_vert_compute_large(const uint16_t *src,
- int src_stride, const __m128i *f,
- int taps, int w, int h,
- uint16_t *dst, int dst_stride,
- int avg, int bd) {
- int col;
- int rowIndex = 0;
- const uint16_t *src_ptr = src;
- uint16_t *dst_ptr = dst;
- const int step = 4;
- WritePixels write4pixels = write4pixelsTab[avg];
-
- do {
- for (col = 0; col < w; col += step) {
- filter_vert_horiz_parallel(src_ptr, src_stride, f, taps, dst_ptr,
- write4pixels, bd);
- src_ptr += step;
- dst_ptr += step;
- }
- rowIndex++;
- src_ptr = src + rowIndex * src_stride;
- dst_ptr = dst + rowIndex * dst_stride;
- } while (rowIndex < h);
-}
-
-static void highbd_filter_vert_compute_small(const uint16_t *src,
- int src_stride, const __m128i *f,
- int taps, int w, int h,
- uint16_t *dst, int dst_stride,
- int avg, int bd) {
- int rowIndex = 0;
- WritePixels write2pixels = write2pixelsTab[avg];
- (void)w;
-
- do {
- filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels, bd);
- rowIndex++;
- src += src_stride;
- dst += dst_stride;
- } while (rowIndex < h);
-}
-
-void av1_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h,
- const InterpFilterParams filter_params,
- const int subpel_y_q4, int y_step_q4,
- int avg, int bd) {
- __m128i verf[6];
- HbdSubpelFilterCoeffs vCoeffs;
- const int tapsNum = filter_params.taps;
-
- if (0 == subpel_y_q4 || 16 != y_step_q4) {
- av1_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_y_q4, y_step_q4, avg, bd);
- return;
- }
-
- vCoeffs =
- hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
- if (!vCoeffs) {
- av1_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
- filter_params, subpel_y_q4, y_step_q4, avg, bd);
- return;
- }
-
- verf[0] = *((const __m128i *)(vCoeffs));
- verf[1] = *((const __m128i *)(vCoeffs + 1));
- verf[2] = *((const __m128i *)(vCoeffs + 2));
- verf[3] = *((const __m128i *)(vCoeffs + 3));
- verf[4] = *((const __m128i *)(vCoeffs + 4));
- verf[5] = *((const __m128i *)(vCoeffs + 5));
-
- src -= src_stride * ((tapsNum >> 1) - 1);
-
- if (w > 2) {
- highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h, dst,
- dst_stride, avg, bd);
- } else {
- highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h, dst,
- dst_stride, avg, bd);
- }
-}
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c
new file mode 100644
index 000000000..7415c58df
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c
@@ -0,0 +1,1957 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_config.h"
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "av1/common/x86/av1_txfm_sse2.h"
+#include "av1/common/x86/av1_inv_txfm_avx2.h"
+#include "av1/common/x86/av1_inv_txfm_ssse3.h"
+
+static INLINE void idct16_stage5_avx2(__m256i *x1, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x1[0], x1[3]);
+ btf_16_adds_subs_avx2(x1[1], x1[2]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x1[5], x1[6], x1[5], x1[6]);
+
+ btf_16_adds_subs_avx2(x1[8], x1[11]);
+ btf_16_adds_subs_avx2(x1[9], x1[10]);
+ btf_16_subs_adds_avx2(x1[15], x1[12]);
+ btf_16_subs_adds_avx2(x1[14], x1[13]);
+}
+
+static INLINE void idct16_stage6_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x[0], x[7]);
+ btf_16_adds_subs_avx2(x[1], x[6]);
+ btf_16_adds_subs_avx2(x[2], x[5]);
+ btf_16_adds_subs_avx2(x[3], x[4]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+}
+
+static INLINE void idct16_stage7_avx2(__m256i *output, __m256i *x1) {
+ btf_16_adds_subs_out_avx2(output[0], output[15], x1[0], x1[15]);
+ btf_16_adds_subs_out_avx2(output[1], output[14], x1[1], x1[14]);
+ btf_16_adds_subs_out_avx2(output[2], output[13], x1[2], x1[13]);
+ btf_16_adds_subs_out_avx2(output[3], output[12], x1[3], x1[12]);
+ btf_16_adds_subs_out_avx2(output[4], output[11], x1[4], x1[11]);
+ btf_16_adds_subs_out_avx2(output[5], output[10], x1[5], x1[10]);
+ btf_16_adds_subs_out_avx2(output[6], output[9], x1[6], x1[9]);
+ btf_16_adds_subs_out_avx2(output[7], output[8], x1[7], x1[8]);
+}
+
+static void idct16_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
+ __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
+ __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
+ __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
+ __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
+ __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
+ __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
+ __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
+ __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
+ __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
+ __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
+ __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
+ __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
+ __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
+ __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
+ __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+
+ // stage 1
+ __m256i x1[16];
+ x1[0] = input[0];
+ x1[1] = input[8];
+ x1[2] = input[4];
+ x1[3] = input[12];
+ x1[4] = input[2];
+ x1[5] = input[10];
+ x1[6] = input[6];
+ x1[7] = input[14];
+ x1[8] = input[1];
+ x1[9] = input[9];
+ x1[10] = input[5];
+ x1[11] = input[13];
+ x1[12] = input[3];
+ x1[13] = input[11];
+ x1[14] = input[7];
+ x1[15] = input[15];
+
+ // stage 2
+ btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, x1[8], x1[15], x1[8], x1[15]);
+ btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, x1[9], x1[14], x1[9], x1[14]);
+ btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, x1[10], x1[13], x1[10], x1[13]);
+ btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, x1[11], x1[12], x1[11], x1[12]);
+
+ // stage 3
+ btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, x1[4], x1[7], x1[4], x1[7]);
+ btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, x1[5], x1[6], x1[5], x1[6]);
+ btf_16_adds_subs_avx2(x1[8], x1[9]);
+ btf_16_subs_adds_avx2(x1[11], x1[10]);
+ btf_16_adds_subs_avx2(x1[12], x1[13]);
+ btf_16_subs_adds_avx2(x1[15], x1[14]);
+
+ // stage 4
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[0], x1[1], x1[0], x1[1]);
+ btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, x1[2], x1[3], x1[2], x1[3]);
+ btf_16_adds_subs_avx2(x1[4], x1[5]);
+ btf_16_subs_adds_avx2(x1[7], x1[6]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x1[9], x1[14], x1[9], x1[14]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x1[10], x1[13], x1[10], x1[13]);
+
+ idct16_stage5_avx2(x1, cospi, __rounding, cos_bit);
+ idct16_stage6_avx2(x1, cospi, __rounding, cos_bit);
+ idct16_stage7_avx2(output, x1);
+}
+
+static void idct16_low8_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+
+ // stage 1
+ __m256i x1[16];
+ x1[0] = input[0];
+ x1[2] = input[4];
+ x1[4] = input[2];
+ x1[6] = input[6];
+ x1[8] = input[1];
+ x1[10] = input[5];
+ x1[12] = input[3];
+ x1[14] = input[7];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x1[8], x1[8], x1[15]);
+ btf_16_w16_0_avx2(-cospi[36], cospi[28], x1[14], x1[9], x1[14]);
+ btf_16_w16_0_avx2(cospi[44], cospi[20], x1[10], x1[10], x1[13]);
+ btf_16_w16_0_avx2(-cospi[52], cospi[12], x1[12], x1[11], x1[12]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[56], cospi[8], x1[4], x1[4], x1[7]);
+ btf_16_w16_0_avx2(-cospi[40], cospi[24], x1[6], x1[5], x1[6]);
+ btf_16_adds_subs_avx2(x1[8], x1[9]);
+ btf_16_subs_adds_avx2(x1[11], x1[10]);
+ btf_16_adds_subs_avx2(x1[12], x1[13]);
+ btf_16_subs_adds_avx2(x1[15], x1[14]);
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]);
+ btf_16_w16_0_avx2(cospi[48], cospi[16], x1[2], x1[2], x1[3]);
+ btf_16_adds_subs_avx2(x1[4], x1[5]);
+ btf_16_subs_adds_avx2(x1[7], x1[6]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x1[9], x1[14], x1[9], x1[14]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x1[10], x1[13], x1[10], x1[13]);
+
+ idct16_stage5_avx2(x1, cospi, __rounding, cos_bit);
+ idct16_stage6_avx2(x1, cospi, __rounding, cos_bit);
+ idct16_stage7_avx2(output, x1);
+}
+
+static void idct16_low1_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m256i x1[2];
+ x1[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]);
+
+ // stage 5
+ // stage 6
+ output[0] = x1[0];
+ output[1] = x1[1];
+ output[2] = x1[1];
+ output[3] = x1[0];
+ output[4] = x1[0];
+ output[5] = x1[1];
+ output[6] = x1[1];
+ output[7] = x1[0];
+ output[8] = x1[0];
+ output[9] = x1[1];
+ output[10] = x1[1];
+ output[11] = x1[0];
+ output[12] = x1[0];
+ output[13] = x1[1];
+ output[14] = x1[1];
+ output[15] = x1[0];
+}
+
+static INLINE void iadst16_stage3_avx2(__m256i *x) {
+ btf_16_adds_subs_avx2(x[0], x[8]);
+ btf_16_adds_subs_avx2(x[1], x[9]);
+ btf_16_adds_subs_avx2(x[2], x[10]);
+ btf_16_adds_subs_avx2(x[3], x[11]);
+ btf_16_adds_subs_avx2(x[4], x[12]);
+ btf_16_adds_subs_avx2(x[5], x[13]);
+ btf_16_adds_subs_avx2(x[6], x[14]);
+ btf_16_adds_subs_avx2(x[7], x[15]);
+}
+
+static INLINE void iadst16_stage4_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
+ const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
+ const __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
+ const __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
+ const __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]);
+ const __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]);
+ btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
+ btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]);
+ btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]);
+ btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]);
+}
+
+static INLINE void iadst16_stage5_avx2(__m256i *x) {
+ btf_16_adds_subs_avx2(x[0], x[4]);
+ btf_16_adds_subs_avx2(x[1], x[5]);
+ btf_16_adds_subs_avx2(x[2], x[6]);
+ btf_16_adds_subs_avx2(x[3], x[7]);
+ btf_16_adds_subs_avx2(x[8], x[12]);
+ btf_16_adds_subs_avx2(x[9], x[13]);
+ btf_16_adds_subs_avx2(x[10], x[14]);
+ btf_16_adds_subs_avx2(x[11], x[15]);
+}
+
+static INLINE void iadst16_stage6_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
+ const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
+ const __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
+ btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
+ btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
+ btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]);
+}
+
+static INLINE void iadst16_stage7_avx2(__m256i *x) {
+ btf_16_adds_subs_avx2(x[0], x[2]);
+ btf_16_adds_subs_avx2(x[1], x[3]);
+ btf_16_adds_subs_avx2(x[4], x[6]);
+ btf_16_adds_subs_avx2(x[5], x[7]);
+ btf_16_adds_subs_avx2(x[8], x[10]);
+ btf_16_adds_subs_avx2(x[9], x[11]);
+ btf_16_adds_subs_avx2(x[12], x[14]);
+ btf_16_adds_subs_avx2(x[13], x[15]);
+}
+
+static INLINE void iadst16_stage8_avx2(__m256i *x1, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ const __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x1[2], x1[3]);
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x1[6], x1[7]);
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[10], x1[11], x1[10], x1[11]);
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[14], x1[15], x1[14], x1[15]);
+}
+
+static INLINE void iadst16_stage9_avx2(__m256i *output, __m256i *x1) {
+ const __m256i __zero = _mm256_setzero_si256();
+ output[0] = x1[0];
+ output[1] = _mm256_subs_epi16(__zero, x1[8]);
+ output[2] = x1[12];
+ output[3] = _mm256_subs_epi16(__zero, x1[4]);
+ output[4] = x1[6];
+ output[5] = _mm256_subs_epi16(__zero, x1[14]);
+ output[6] = x1[10];
+ output[7] = _mm256_subs_epi16(__zero, x1[2]);
+ output[8] = x1[3];
+ output[9] = _mm256_subs_epi16(__zero, x1[11]);
+ output[10] = x1[15];
+ output[11] = _mm256_subs_epi16(__zero, x1[7]);
+ output[12] = x1[5];
+ output[13] = _mm256_subs_epi16(__zero, x1[13]);
+ output[14] = x1[9];
+ output[15] = _mm256_subs_epi16(__zero, x1[1]);
+}
+
+static void iadst16_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
+ __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
+ __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
+ __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
+ __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
+ __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
+ __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
+ __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
+ __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
+ __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
+ __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
+ __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
+ __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
+ __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
+ __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
+ __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
+
+ // stage 1
+ __m256i x1[16];
+ x1[0] = input[15];
+ x1[1] = input[0];
+ x1[2] = input[13];
+ x1[3] = input[2];
+ x1[4] = input[11];
+ x1[5] = input[4];
+ x1[6] = input[9];
+ x1[7] = input[6];
+ x1[8] = input[7];
+ x1[9] = input[8];
+ x1[10] = input[5];
+ x1[11] = input[10];
+ x1[12] = input[3];
+ x1[13] = input[12];
+ x1[14] = input[1];
+ x1[15] = input[14];
+
+ // stage 2
+ btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, x1[0], x1[1], x1[0], x1[1]);
+ btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, x1[2], x1[3], x1[2], x1[3]);
+ btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, x1[4], x1[5], x1[4], x1[5]);
+ btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, x1[6], x1[7], x1[6], x1[7]);
+ btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, x1[8], x1[9], x1[8], x1[9]);
+ btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, x1[10], x1[11], x1[10], x1[11]);
+ btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, x1[12], x1[13], x1[12], x1[13]);
+ btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, x1[14], x1[15], x1[14], x1[15]);
+
+ iadst16_stage3_avx2(x1);
+ iadst16_stage4_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage5_avx2(x1);
+ iadst16_stage6_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage7_avx2(x1);
+ iadst16_stage8_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage9_avx2(output, x1);
+}
+
+static void iadst16_low8_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m256i x1[16];
+ x1[1] = input[0];
+ x1[3] = input[2];
+ x1[5] = input[4];
+ x1[7] = input[6];
+ x1[8] = input[7];
+ x1[10] = input[5];
+ x1[12] = input[3];
+ x1[14] = input[1];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]);
+ btf_16_w16_0_avx2(cospi[54], -cospi[10], x1[3], x1[2], x1[3]);
+ btf_16_w16_0_avx2(cospi[46], -cospi[18], x1[5], x1[4], x1[5]);
+ btf_16_w16_0_avx2(cospi[38], -cospi[26], x1[7], x1[6], x1[7]);
+ btf_16_w16_0_avx2(cospi[34], cospi[30], x1[8], x1[8], x1[9]);
+ btf_16_w16_0_avx2(cospi[42], cospi[22], x1[10], x1[10], x1[11]);
+ btf_16_w16_0_avx2(cospi[50], cospi[14], x1[12], x1[12], x1[13]);
+ btf_16_w16_0_avx2(cospi[58], cospi[06], x1[14], x1[14], x1[15]);
+
+ iadst16_stage3_avx2(x1);
+ iadst16_stage4_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage5_avx2(x1);
+ iadst16_stage6_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage7_avx2(x1);
+ iadst16_stage8_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage9_avx2(output, x1);
+}
+
+static void iadst16_low1_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
+ const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
+ const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
+ const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
+
+ // stage 1
+ __m256i x1[16];
+ x1[1] = input[0];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]);
+
+ // stage 3
+ x1[8] = x1[0];
+ x1[9] = x1[1];
+
+ // stage 4
+ btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, x1[8], x1[9], x1[8], x1[9]);
+
+ // stage 5
+ x1[4] = x1[0];
+ x1[5] = x1[1];
+
+ x1[12] = x1[8];
+ x1[13] = x1[9];
+
+ // stage 6
+ btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, x1[4], x1[5], x1[4], x1[5]);
+ btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, x1[12], x1[13], x1[12], x1[13]);
+
+ // stage 7
+ x1[2] = x1[0];
+ x1[3] = x1[1];
+ x1[6] = x1[4];
+ x1[7] = x1[5];
+ x1[10] = x1[8];
+ x1[11] = x1[9];
+ x1[14] = x1[12];
+ x1[15] = x1[13];
+
+ iadst16_stage8_avx2(x1, cospi, __rounding, cos_bit);
+ iadst16_stage9_avx2(output, x1);
+}
+
+static INLINE void idct32_high16_stage3_avx2(__m256i *x) {
+ btf_16_adds_subs_avx2(x[16], x[17]);
+ btf_16_subs_adds_avx2(x[19], x[18]);
+ btf_16_adds_subs_avx2(x[20], x[21]);
+ btf_16_subs_adds_avx2(x[23], x[22]);
+ btf_16_adds_subs_avx2(x[24], x[25]);
+ btf_16_subs_adds_avx2(x[27], x[26]);
+ btf_16_adds_subs_avx2(x[28], x[29]);
+ btf_16_subs_adds_avx2(x[31], x[30]);
+}
+
+static INLINE void idct32_high16_stage4_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
+ const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
+ const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
+ const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
+ const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
+ const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
+ btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
+ btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
+ btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+}
+
+static INLINE void idct32_high24_stage5_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ btf_16_adds_subs_avx2(x[16], x[19]);
+ btf_16_adds_subs_avx2(x[17], x[18]);
+ btf_16_subs_adds_avx2(x[23], x[20]);
+ btf_16_subs_adds_avx2(x[22], x[21]);
+ btf_16_adds_subs_avx2(x[24], x[27]);
+ btf_16_adds_subs_avx2(x[25], x[26]);
+ btf_16_subs_adds_avx2(x[31], x[28]);
+ btf_16_subs_adds_avx2(x[30], x[29]);
+}
+
+static INLINE void idct32_high28_stage6_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_avx2(x[8], x[11]);
+ btf_16_adds_subs_avx2(x[9], x[10]);
+ btf_16_subs_adds_avx2(x[15], x[12]);
+ btf_16_subs_adds_avx2(x[14], x[13]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
+}
+
+static INLINE void idct32_stage7_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x[0], x[7]);
+ btf_16_adds_subs_avx2(x[1], x[6]);
+ btf_16_adds_subs_avx2(x[2], x[5]);
+ btf_16_adds_subs_avx2(x[3], x[4]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ btf_16_adds_subs_avx2(x[16], x[23]);
+ btf_16_adds_subs_avx2(x[17], x[22]);
+ btf_16_adds_subs_avx2(x[18], x[21]);
+ btf_16_adds_subs_avx2(x[19], x[20]);
+ btf_16_subs_adds_avx2(x[31], x[24]);
+ btf_16_subs_adds_avx2(x[30], x[25]);
+ btf_16_subs_adds_avx2(x[29], x[26]);
+ btf_16_subs_adds_avx2(x[28], x[27]);
+}
+
+static INLINE void idct32_stage8_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x[0], x[15]);
+ btf_16_adds_subs_avx2(x[1], x[14]);
+ btf_16_adds_subs_avx2(x[2], x[13]);
+ btf_16_adds_subs_avx2(x[3], x[12]);
+ btf_16_adds_subs_avx2(x[4], x[11]);
+ btf_16_adds_subs_avx2(x[5], x[10]);
+ btf_16_adds_subs_avx2(x[6], x[9]);
+ btf_16_adds_subs_avx2(x[7], x[8]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
+}
+
+static INLINE void idct32_stage9_avx2(__m256i *output, __m256i *x) {
+ btf_16_adds_subs_out_avx2(output[0], output[31], x[0], x[31]);
+ btf_16_adds_subs_out_avx2(output[1], output[30], x[1], x[30]);
+ btf_16_adds_subs_out_avx2(output[2], output[29], x[2], x[29]);
+ btf_16_adds_subs_out_avx2(output[3], output[28], x[3], x[28]);
+ btf_16_adds_subs_out_avx2(output[4], output[27], x[4], x[27]);
+ btf_16_adds_subs_out_avx2(output[5], output[26], x[5], x[26]);
+ btf_16_adds_subs_out_avx2(output[6], output[25], x[6], x[25]);
+ btf_16_adds_subs_out_avx2(output[7], output[24], x[7], x[24]);
+ btf_16_adds_subs_out_avx2(output[8], output[23], x[8], x[23]);
+ btf_16_adds_subs_out_avx2(output[9], output[22], x[9], x[22]);
+ btf_16_adds_subs_out_avx2(output[10], output[21], x[10], x[21]);
+ btf_16_adds_subs_out_avx2(output[11], output[20], x[11], x[20]);
+ btf_16_adds_subs_out_avx2(output[12], output[19], x[12], x[19]);
+ btf_16_adds_subs_out_avx2(output[13], output[18], x[13], x[18]);
+ btf_16_adds_subs_out_avx2(output[14], output[17], x[14], x[17]);
+ btf_16_adds_subs_out_avx2(output[15], output[16], x[15], x[16]);
+}
+
+static void idct32_low1_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m256i x[2];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ // stage 5
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 6
+ // stage 7
+ // stage 8
+ // stage 9
+ output[0] = x[0];
+ output[31] = x[0];
+ output[1] = x[1];
+ output[30] = x[1];
+ output[2] = x[1];
+ output[29] = x[1];
+ output[3] = x[0];
+ output[28] = x[0];
+ output[4] = x[0];
+ output[27] = x[0];
+ output[5] = x[1];
+ output[26] = x[1];
+ output[6] = x[1];
+ output[25] = x[1];
+ output[7] = x[0];
+ output[24] = x[0];
+ output[8] = x[0];
+ output[23] = x[0];
+ output[9] = x[1];
+ output[22] = x[1];
+ output[10] = x[1];
+ output[21] = x[1];
+ output[11] = x[0];
+ output[20] = x[0];
+ output[12] = x[0];
+ output[19] = x[0];
+ output[13] = x[1];
+ output[18] = x[1];
+ output[14] = x[1];
+ output[17] = x[1];
+ output[15] = x[0];
+ output[16] = x[0];
+}
+
+static void idct32_low8_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m256i x[32];
+ x[0] = input[0];
+ x[4] = input[4];
+ x[8] = input[2];
+ x[12] = input[6];
+ x[16] = input[1];
+ x[20] = input[5];
+ x[24] = input[3];
+ x[28] = input[7];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ x[17] = x[16];
+ x[18] = x[19];
+ x[21] = x[20];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[26] = x[27];
+ x[29] = x[28];
+ x[30] = x[31];
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
+ x[9] = x[8];
+ x[10] = x[11];
+ x[13] = x[12];
+ x[14] = x[15];
+ idct32_high16_stage4_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+ x[5] = x[4];
+ x[6] = x[7];
+ idct32_high24_stage5_avx2(x, cospi, __rounding, cos_bit);
+ // stage 6
+ x[3] = x[0];
+ x[2] = x[1];
+ idct32_high28_stage6_avx2(x, cospi, __rounding, cos_bit);
+
+ idct32_stage7_avx2(x, cospi, __rounding, cos_bit);
+ idct32_stage8_avx2(x, cospi, __rounding, cos_bit);
+ idct32_stage9_avx2(output, x);
+}
+
+static void idct32_low16_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m256i x[32];
+ x[0] = input[0];
+ x[2] = input[8];
+ x[4] = input[4];
+ x[6] = input[12];
+ x[8] = input[2];
+ x[10] = input[10];
+ x[12] = input[6];
+ x[14] = input[14];
+ x[16] = input[1];
+ x[18] = input[9];
+ x[20] = input[5];
+ x[22] = input[13];
+ x[24] = input[3];
+ x[26] = input[11];
+ x[28] = input[7];
+ x[30] = input[15];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]);
+ btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]);
+ btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]);
+ btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]);
+ btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]);
+ btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]);
+ btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ idct32_high16_stage3_avx2(x);
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
+ btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]);
+ btf_16_adds_subs_avx2(x[8], x[9]);
+ btf_16_subs_adds_avx2(x[11], x[10]);
+ btf_16_adds_subs_avx2(x[12], x[13]);
+ btf_16_subs_adds_avx2(x[15], x[14]);
+ idct32_high16_stage4_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]);
+ btf_16_adds_subs_avx2(x[4], x[5]);
+ btf_16_subs_adds_avx2(x[7], x[6]);
+ idct32_high24_stage5_avx2(x, cospi, __rounding, cos_bit);
+
+ btf_16_adds_subs_avx2(x[0], x[3]);
+ btf_16_adds_subs_avx2(x[1], x[2]);
+ idct32_high28_stage6_avx2(x, cospi, __rounding, cos_bit);
+
+ idct32_stage7_avx2(x, cospi, __rounding, cos_bit);
+ idct32_stage8_avx2(x, cospi, __rounding, cos_bit);
+ idct32_stage9_avx2(output, x);
+}
+
+static void idct32_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)(cos_bit);
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
+ __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
+ __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
+ __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
+ __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
+ __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
+ __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
+ __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
+ __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
+ __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
+ __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
+ __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
+ __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
+ __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
+ __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
+ __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
+ __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
+ __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
+ __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
+ __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
+ __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
+ __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
+ __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
+ __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
+ __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
+ __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
+ __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
+ __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
+ __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
+ __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
+ __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
+
+ // stage 1
+ __m256i x1[32];
+ x1[0] = input[0];
+ x1[1] = input[16];
+ x1[2] = input[8];
+ x1[3] = input[24];
+ x1[4] = input[4];
+ x1[5] = input[20];
+ x1[6] = input[12];
+ x1[7] = input[28];
+ x1[8] = input[2];
+ x1[9] = input[18];
+ x1[10] = input[10];
+ x1[11] = input[26];
+ x1[12] = input[6];
+ x1[13] = input[22];
+ x1[14] = input[14];
+ x1[15] = input[30];
+ x1[16] = input[1];
+ x1[17] = input[17];
+ x1[18] = input[9];
+ x1[19] = input[25];
+ x1[20] = input[5];
+ x1[21] = input[21];
+ x1[22] = input[13];
+ x1[23] = input[29];
+ x1[24] = input[3];
+ x1[25] = input[19];
+ x1[26] = input[11];
+ x1[27] = input[27];
+ x1[28] = input[7];
+ x1[29] = input[23];
+ x1[30] = input[15];
+ x1[31] = input[31];
+
+ // stage 2
+ btf_16_w16_avx2(cospi_p62_m02, cospi_p02_p62, x1[16], x1[31], x1[16], x1[31]);
+ btf_16_w16_avx2(cospi_p30_m34, cospi_p34_p30, x1[17], x1[30], x1[17], x1[30]);
+ btf_16_w16_avx2(cospi_p46_m18, cospi_p18_p46, x1[18], x1[29], x1[18], x1[29]);
+ btf_16_w16_avx2(cospi_p14_m50, cospi_p50_p14, x1[19], x1[28], x1[19], x1[28]);
+ btf_16_w16_avx2(cospi_p54_m10, cospi_p10_p54, x1[20], x1[27], x1[20], x1[27]);
+ btf_16_w16_avx2(cospi_p22_m42, cospi_p42_p22, x1[21], x1[26], x1[21], x1[26]);
+ btf_16_w16_avx2(cospi_p38_m26, cospi_p26_p38, x1[22], x1[25], x1[22], x1[25]);
+ btf_16_w16_avx2(cospi_p06_m58, cospi_p58_p06, x1[23], x1[24], x1[23], x1[24]);
+
+ // stage 3
+ btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, x1[8], x1[15], x1[8], x1[15]);
+ btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, x1[9], x1[14], x1[9], x1[14]);
+ btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, x1[10], x1[13], x1[10], x1[13]);
+ btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, x1[11], x1[12], x1[11], x1[12]);
+ idct32_high16_stage3_avx2(x1);
+
+ // stage 4
+ btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, x1[4], x1[7], x1[4], x1[7]);
+ btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, x1[5], x1[6], x1[5], x1[6]);
+ btf_16_adds_subs_avx2(x1[8], x1[9]);
+ btf_16_subs_adds_avx2(x1[11], x1[10]);
+ btf_16_adds_subs_avx2(x1[12], x1[13]);
+ btf_16_subs_adds_avx2(x1[15], x1[14]);
+ idct32_high16_stage4_avx2(x1, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, x1[0], x1[1], x1[0], x1[1]);
+ btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, x1[2], x1[3], x1[2], x1[3]);
+ btf_16_adds_subs_avx2(x1[4], x1[5]);
+ btf_16_subs_adds_avx2(x1[7], x1[6]);
+ idct32_high24_stage5_avx2(x1, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_adds_subs_avx2(x1[0], x1[3]);
+ btf_16_adds_subs_avx2(x1[1], x1[2]);
+ idct32_high28_stage6_avx2(x1, cospi, __rounding, cos_bit);
+
+ idct32_stage7_avx2(x1, cospi, __rounding, cos_bit);
+ idct32_stage8_avx2(x1, cospi, __rounding, cos_bit);
+ idct32_stage9_avx2(output, x1);
+}
+
+static INLINE void idct64_stage4_high32_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
+ const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
+ const __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]);
+ const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
+ const __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
+ const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
+ const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
+ const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
+ const __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]);
+ const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
+ const __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
+ const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
+ btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
+ btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, x[34], x[61], x[34], x[61]);
+ btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, x[37], x[58], x[37], x[58]);
+ btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
+ btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
+ btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, x[42], x[53], x[42], x[53]);
+ btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, x[45], x[50], x[45], x[50]);
+ btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
+}
+
+static INLINE void idct64_stage5_high48_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
+ const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
+ const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
+ const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
+ const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
+ const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
+ btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
+ btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
+ btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+ btf_16_adds_subs_avx2(x[32], x[35]);
+ btf_16_adds_subs_avx2(x[33], x[34]);
+ btf_16_subs_adds_avx2(x[39], x[36]);
+ btf_16_subs_adds_avx2(x[38], x[37]);
+ btf_16_adds_subs_avx2(x[40], x[43]);
+ btf_16_adds_subs_avx2(x[41], x[42]);
+ btf_16_subs_adds_avx2(x[47], x[44]);
+ btf_16_subs_adds_avx2(x[46], x[45]);
+ btf_16_adds_subs_avx2(x[48], x[51]);
+ btf_16_adds_subs_avx2(x[49], x[50]);
+ btf_16_subs_adds_avx2(x[55], x[52]);
+ btf_16_subs_adds_avx2(x[54], x[53]);
+ btf_16_adds_subs_avx2(x[56], x[59]);
+ btf_16_adds_subs_avx2(x[57], x[58]);
+ btf_16_subs_adds_avx2(x[63], x[60]);
+ btf_16_subs_adds_avx2(x[62], x[61]);
+}
+
+static INLINE void idct64_stage6_high32_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
+ const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
+ const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
+ const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
+ const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
+ const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
+ btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, x[34], x[61], x[34], x[61]);
+ btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, x[35], x[60], x[35], x[60]);
+ btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, x[36], x[59], x[36], x[59]);
+ btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, x[37], x[58], x[37], x[58]);
+ btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, x[42], x[53], x[42], x[53]);
+ btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, x[43], x[52], x[43], x[52]);
+ btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, x[44], x[51], x[44], x[51]);
+ btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, x[45], x[50], x[45], x[50]);
+}
+
+static INLINE void idct64_stage6_high48_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ btf_16_adds_subs_avx2(x[16], x[19]);
+ btf_16_adds_subs_avx2(x[17], x[18]);
+ btf_16_subs_adds_avx2(x[23], x[20]);
+ btf_16_subs_adds_avx2(x[22], x[21]);
+ btf_16_adds_subs_avx2(x[24], x[27]);
+ btf_16_adds_subs_avx2(x[25], x[26]);
+ btf_16_subs_adds_avx2(x[31], x[28]);
+ btf_16_subs_adds_avx2(x[30], x[29]);
+ idct64_stage6_high32_avx2(x, cospi, __rounding, cos_bit);
+}
+
+static INLINE void idct64_stage7_high48_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
+ btf_16_adds_subs_avx2(x[32], x[39]);
+ btf_16_adds_subs_avx2(x[33], x[38]);
+ btf_16_adds_subs_avx2(x[34], x[37]);
+ btf_16_adds_subs_avx2(x[35], x[36]);
+ btf_16_subs_adds_avx2(x[47], x[40]);
+ btf_16_subs_adds_avx2(x[46], x[41]);
+ btf_16_subs_adds_avx2(x[45], x[42]);
+ btf_16_subs_adds_avx2(x[44], x[43]);
+ btf_16_adds_subs_avx2(x[48], x[55]);
+ btf_16_adds_subs_avx2(x[49], x[54]);
+ btf_16_adds_subs_avx2(x[50], x[53]);
+ btf_16_adds_subs_avx2(x[51], x[52]);
+ btf_16_subs_adds_avx2(x[63], x[56]);
+ btf_16_subs_adds_avx2(x[62], x[57]);
+ btf_16_subs_adds_avx2(x[61], x[58]);
+ btf_16_subs_adds_avx2(x[60], x[59]);
+}
+
+static INLINE void idct64_stage8_high48_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ btf_16_adds_subs_avx2(x[16], x[23]);
+ btf_16_adds_subs_avx2(x[17], x[22]);
+ btf_16_adds_subs_avx2(x[18], x[21]);
+ btf_16_adds_subs_avx2(x[19], x[20]);
+ btf_16_subs_adds_avx2(x[31], x[24]);
+ btf_16_subs_adds_avx2(x[30], x[25]);
+ btf_16_subs_adds_avx2(x[29], x[26]);
+ btf_16_subs_adds_avx2(x[28], x[27]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[36], x[59], x[36], x[59]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[37], x[58], x[37], x[58]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[38], x[57], x[38], x[57]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[39], x[56], x[39], x[56]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[40], x[55], x[40], x[55]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[41], x[54], x[41], x[54]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[42], x[53], x[42], x[53]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[43], x[52], x[43], x[52]);
+}
+
+static INLINE void idct64_stage9_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x[0], x[15]);
+ btf_16_adds_subs_avx2(x[1], x[14]);
+ btf_16_adds_subs_avx2(x[2], x[13]);
+ btf_16_adds_subs_avx2(x[3], x[12]);
+ btf_16_adds_subs_avx2(x[4], x[11]);
+ btf_16_adds_subs_avx2(x[5], x[10]);
+ btf_16_adds_subs_avx2(x[6], x[9]);
+ btf_16_adds_subs_avx2(x[7], x[8]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
+ btf_16_adds_subs_avx2(x[32], x[47]);
+ btf_16_adds_subs_avx2(x[33], x[46]);
+ btf_16_adds_subs_avx2(x[34], x[45]);
+ btf_16_adds_subs_avx2(x[35], x[44]);
+ btf_16_adds_subs_avx2(x[36], x[43]);
+ btf_16_adds_subs_avx2(x[37], x[42]);
+ btf_16_adds_subs_avx2(x[38], x[41]);
+ btf_16_adds_subs_avx2(x[39], x[40]);
+ btf_16_subs_adds_avx2(x[63], x[48]);
+ btf_16_subs_adds_avx2(x[62], x[49]);
+ btf_16_subs_adds_avx2(x[61], x[50]);
+ btf_16_subs_adds_avx2(x[60], x[51]);
+ btf_16_subs_adds_avx2(x[59], x[52]);
+ btf_16_subs_adds_avx2(x[58], x[53]);
+ btf_16_subs_adds_avx2(x[57], x[54]);
+ btf_16_subs_adds_avx2(x[56], x[55]);
+}
+
+static INLINE void idct64_stage10_avx2(__m256i *x, const int32_t *cospi,
+ const __m256i __rounding,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_avx2(x[0], x[31]);
+ btf_16_adds_subs_avx2(x[1], x[30]);
+ btf_16_adds_subs_avx2(x[2], x[29]);
+ btf_16_adds_subs_avx2(x[3], x[28]);
+ btf_16_adds_subs_avx2(x[4], x[27]);
+ btf_16_adds_subs_avx2(x[5], x[26]);
+ btf_16_adds_subs_avx2(x[6], x[25]);
+ btf_16_adds_subs_avx2(x[7], x[24]);
+ btf_16_adds_subs_avx2(x[8], x[23]);
+ btf_16_adds_subs_avx2(x[9], x[22]);
+ btf_16_adds_subs_avx2(x[10], x[21]);
+ btf_16_adds_subs_avx2(x[11], x[20]);
+ btf_16_adds_subs_avx2(x[12], x[19]);
+ btf_16_adds_subs_avx2(x[13], x[18]);
+ btf_16_adds_subs_avx2(x[14], x[17]);
+ btf_16_adds_subs_avx2(x[15], x[16]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[40], x[55], x[40], x[55]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[41], x[54], x[41], x[54]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[42], x[53], x[42], x[53]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[43], x[52], x[43], x[52]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[44], x[51], x[44], x[51]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[45], x[50], x[45], x[50]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[46], x[49], x[46], x[49]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[47], x[48], x[47], x[48]);
+}
+
+static INLINE void idct64_stage11_avx2(__m256i *output, __m256i *x) {
+ btf_16_adds_subs_out_avx2(output[0], output[63], x[0], x[63]);
+ btf_16_adds_subs_out_avx2(output[1], output[62], x[1], x[62]);
+ btf_16_adds_subs_out_avx2(output[2], output[61], x[2], x[61]);
+ btf_16_adds_subs_out_avx2(output[3], output[60], x[3], x[60]);
+ btf_16_adds_subs_out_avx2(output[4], output[59], x[4], x[59]);
+ btf_16_adds_subs_out_avx2(output[5], output[58], x[5], x[58]);
+ btf_16_adds_subs_out_avx2(output[6], output[57], x[6], x[57]);
+ btf_16_adds_subs_out_avx2(output[7], output[56], x[7], x[56]);
+ btf_16_adds_subs_out_avx2(output[8], output[55], x[8], x[55]);
+ btf_16_adds_subs_out_avx2(output[9], output[54], x[9], x[54]);
+ btf_16_adds_subs_out_avx2(output[10], output[53], x[10], x[53]);
+ btf_16_adds_subs_out_avx2(output[11], output[52], x[11], x[52]);
+ btf_16_adds_subs_out_avx2(output[12], output[51], x[12], x[51]);
+ btf_16_adds_subs_out_avx2(output[13], output[50], x[13], x[50]);
+ btf_16_adds_subs_out_avx2(output[14], output[49], x[14], x[49]);
+ btf_16_adds_subs_out_avx2(output[15], output[48], x[15], x[48]);
+ btf_16_adds_subs_out_avx2(output[16], output[47], x[16], x[47]);
+ btf_16_adds_subs_out_avx2(output[17], output[46], x[17], x[46]);
+ btf_16_adds_subs_out_avx2(output[18], output[45], x[18], x[45]);
+ btf_16_adds_subs_out_avx2(output[19], output[44], x[19], x[44]);
+ btf_16_adds_subs_out_avx2(output[20], output[43], x[20], x[43]);
+ btf_16_adds_subs_out_avx2(output[21], output[42], x[21], x[42]);
+ btf_16_adds_subs_out_avx2(output[22], output[41], x[22], x[41]);
+ btf_16_adds_subs_out_avx2(output[23], output[40], x[23], x[40]);
+ btf_16_adds_subs_out_avx2(output[24], output[39], x[24], x[39]);
+ btf_16_adds_subs_out_avx2(output[25], output[38], x[25], x[38]);
+ btf_16_adds_subs_out_avx2(output[26], output[37], x[26], x[37]);
+ btf_16_adds_subs_out_avx2(output[27], output[36], x[27], x[36]);
+ btf_16_adds_subs_out_avx2(output[28], output[35], x[28], x[35]);
+ btf_16_adds_subs_out_avx2(output[29], output[34], x[29], x[34]);
+ btf_16_adds_subs_out_avx2(output[30], output[33], x[30], x[33]);
+ btf_16_adds_subs_out_avx2(output[31], output[32], x[31], x[32]);
+}
+
+static void idct64_low1_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m256i x[32];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ // stage 5
+ // stage 6
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 7
+ // stage 8
+ // stage 9
+ // stage 10
+ // stage 11
+ output[0] = x[0];
+ output[63] = x[0];
+ output[1] = x[1];
+ output[62] = x[1];
+ output[2] = x[1];
+ output[61] = x[1];
+ output[3] = x[0];
+ output[60] = x[0];
+ output[4] = x[0];
+ output[59] = x[0];
+ output[5] = x[1];
+ output[58] = x[1];
+ output[6] = x[1];
+ output[57] = x[1];
+ output[7] = x[0];
+ output[56] = x[0];
+ output[8] = x[0];
+ output[55] = x[0];
+ output[9] = x[1];
+ output[54] = x[1];
+ output[10] = x[1];
+ output[53] = x[1];
+ output[11] = x[0];
+ output[52] = x[0];
+ output[12] = x[0];
+ output[51] = x[0];
+ output[13] = x[1];
+ output[50] = x[1];
+ output[14] = x[1];
+ output[49] = x[1];
+ output[15] = x[0];
+ output[48] = x[0];
+ output[16] = x[0];
+ output[47] = x[0];
+ output[17] = x[1];
+ output[46] = x[1];
+ output[18] = x[1];
+ output[45] = x[1];
+ output[19] = x[0];
+ output[44] = x[0];
+ output[20] = x[0];
+ output[43] = x[0];
+ output[21] = x[1];
+ output[42] = x[1];
+ output[22] = x[1];
+ output[41] = x[1];
+ output[23] = x[0];
+ output[40] = x[0];
+ output[24] = x[0];
+ output[39] = x[0];
+ output[25] = x[1];
+ output[38] = x[1];
+ output[26] = x[1];
+ output[37] = x[1];
+ output[27] = x[0];
+ output[36] = x[0];
+ output[28] = x[0];
+ output[35] = x[0];
+ output[29] = x[1];
+ output[34] = x[1];
+ output[30] = x[1];
+ output[33] = x[1];
+ output[31] = x[0];
+ output[32] = x[0];
+}
+
+static void idct64_low8_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+ const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
+ const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
+ const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
+ const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
+ const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
+ const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
+ const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
+ const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
+ const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
+ const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
+ const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
+ const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m256i x[64];
+ x[0] = input[0];
+ x[8] = input[4];
+ x[16] = input[2];
+ x[24] = input[6];
+ x[32] = input[1];
+ x[40] = input[5];
+ x[48] = input[3];
+ x[56] = input[7];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ x[33] = x[32];
+ x[38] = x[39];
+ x[41] = x[40];
+ x[46] = x[47];
+ x[49] = x[48];
+ x[54] = x[55];
+ x[57] = x[56];
+ x[62] = x[63];
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
+ x[17] = x[16];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[30] = x[31];
+ btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
+ btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
+ btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
+ btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
+
+ // stage 5
+ x[9] = x[8];
+ x[14] = x[15];
+ btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+ x[35] = x[32];
+ x[34] = x[33];
+ x[36] = x[39];
+ x[37] = x[38];
+ x[43] = x[40];
+ x[42] = x[41];
+ x[44] = x[47];
+ x[45] = x[46];
+ x[51] = x[48];
+ x[50] = x[49];
+ x[52] = x[55];
+ x[53] = x[54];
+ x[59] = x[56];
+ x[58] = x[57];
+ x[60] = x[63];
+ x[61] = x[62];
+
+ // stage 6
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ x[19] = x[16];
+ x[18] = x[17];
+ x[20] = x[23];
+ x[21] = x[22];
+ x[27] = x[24];
+ x[26] = x[25];
+ x[28] = x[31];
+ x[29] = x[30];
+ idct64_stage6_high32_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ x[3] = x[0];
+ x[2] = x[1];
+ x[11] = x[8];
+ x[10] = x[9];
+ x[12] = x[15];
+ x[13] = x[14];
+ idct64_stage7_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ x[7] = x[0];
+ x[6] = x[1];
+ x[5] = x[2];
+ x[4] = x[3];
+ x[9] = x[9];
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ idct64_stage9_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_avx2(output, x);
+}
+
+static void idct64_low16_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m256i x[64];
+ x[0] = input[0];
+ x[4] = input[8];
+ x[8] = input[4];
+ x[12] = input[12];
+ x[16] = input[2];
+ x[20] = input[10];
+ x[24] = input[6];
+ x[28] = input[14];
+ x[32] = input[1];
+ x[36] = input[9];
+ x[40] = input[5];
+ x[44] = input[13];
+ x[48] = input[3];
+ x[52] = input[11];
+ x[56] = input[7];
+ x[60] = input[15];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]);
+ btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]);
+ btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]);
+ btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]);
+ btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ x[33] = x[32];
+ x[34] = x[35];
+ x[37] = x[36];
+ x[38] = x[39];
+ x[41] = x[40];
+ x[42] = x[43];
+ x[45] = x[44];
+ x[46] = x[47];
+ x[49] = x[48];
+ x[50] = x[51];
+ x[53] = x[52];
+ x[54] = x[55];
+ x[57] = x[56];
+ x[58] = x[59];
+ x[61] = x[60];
+ x[62] = x[63];
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ x[17] = x[16];
+ x[18] = x[19];
+ x[21] = x[20];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[26] = x[27];
+ x[29] = x[28];
+ x[30] = x[31];
+ idct64_stage4_high32_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
+ x[9] = x[8];
+ x[10] = x[11];
+ x[13] = x[12];
+ x[14] = x[15];
+ idct64_stage5_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+ x[5] = x[4];
+ x[6] = x[7];
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ idct64_stage6_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ x[3] = x[0];
+ x[2] = x[1];
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_avx2(x[8], x[11]);
+ btf_16_adds_subs_avx2(x[9], x[10]);
+ btf_16_subs_adds_avx2(x[15], x[12]);
+ btf_16_subs_adds_avx2(x[14], x[13]);
+ idct64_stage7_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ btf_16_adds_subs_avx2(x[0], x[7]);
+ btf_16_adds_subs_avx2(x[1], x[6]);
+ btf_16_adds_subs_avx2(x[2], x[5]);
+ btf_16_adds_subs_avx2(x[3], x[4]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ idct64_stage9_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_avx2(output, x);
+}
+
+static void idct64_low32_new_avx2(const __m256i *input, __m256i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m256i __rounding = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
+ const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
+ const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
+ const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
+ const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m256i x[64];
+ x[0] = input[0];
+ x[2] = input[16];
+ x[4] = input[8];
+ x[6] = input[24];
+ x[8] = input[4];
+ x[10] = input[20];
+ x[12] = input[12];
+ x[14] = input[28];
+ x[16] = input[2];
+ x[18] = input[18];
+ x[20] = input[10];
+ x[22] = input[26];
+ x[24] = input[6];
+ x[26] = input[22];
+ x[28] = input[14];
+ x[30] = input[30];
+ x[32] = input[1];
+ x[34] = input[17];
+ x[36] = input[9];
+ x[38] = input[25];
+ x[40] = input[5];
+ x[42] = input[21];
+ x[44] = input[13];
+ x[46] = input[29];
+ x[48] = input[3];
+ x[50] = input[19];
+ x[52] = input[11];
+ x[54] = input[27];
+ x[56] = input[7];
+ x[58] = input[23];
+ x[60] = input[15];
+ x[62] = input[31];
+
+ // stage 2
+ btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_w16_0_avx2(-cospi[33], cospi[31], x[62], x[33], x[62]);
+ btf_16_w16_0_avx2(cospi[47], cospi[17], x[34], x[34], x[61]);
+ btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]);
+ btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]);
+ btf_16_w16_0_avx2(-cospi[41], cospi[23], x[58], x[37], x[58]);
+ btf_16_w16_0_avx2(cospi[39], cospi[25], x[38], x[38], x[57]);
+ btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_w16_0_avx2(-cospi[37], cospi[27], x[54], x[41], x[54]);
+ btf_16_w16_0_avx2(cospi[43], cospi[21], x[42], x[42], x[53]);
+ btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]);
+ btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]);
+ btf_16_w16_0_avx2(-cospi[45], cospi[19], x[50], x[45], x[50]);
+ btf_16_w16_0_avx2(cospi[35], cospi[29], x[46], x[46], x[49]);
+ btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]);
+ btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]);
+ btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]);
+ btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]);
+ btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ btf_16_adds_subs_avx2(x[32], x[33]);
+ btf_16_subs_adds_avx2(x[35], x[34]);
+ btf_16_adds_subs_avx2(x[36], x[37]);
+ btf_16_subs_adds_avx2(x[39], x[38]);
+ btf_16_adds_subs_avx2(x[40], x[41]);
+ btf_16_subs_adds_avx2(x[43], x[42]);
+ btf_16_adds_subs_avx2(x[44], x[45]);
+ btf_16_subs_adds_avx2(x[47], x[46]);
+ btf_16_adds_subs_avx2(x[48], x[49]);
+ btf_16_subs_adds_avx2(x[51], x[50]);
+ btf_16_adds_subs_avx2(x[52], x[53]);
+ btf_16_subs_adds_avx2(x[55], x[54]);
+ btf_16_adds_subs_avx2(x[56], x[57]);
+ btf_16_subs_adds_avx2(x[59], x[58]);
+ btf_16_adds_subs_avx2(x[60], x[61]);
+ btf_16_subs_adds_avx2(x[63], x[62]);
+
+ // stage 4
+ btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]);
+ btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]);
+ btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ btf_16_adds_subs_avx2(x[16], x[17]);
+ btf_16_subs_adds_avx2(x[19], x[18]);
+ btf_16_adds_subs_avx2(x[20], x[21]);
+ btf_16_subs_adds_avx2(x[23], x[22]);
+ btf_16_adds_subs_avx2(x[24], x[25]);
+ btf_16_subs_adds_avx2(x[27], x[26]);
+ btf_16_adds_subs_avx2(x[28], x[29]);
+ btf_16_subs_adds_avx2(x[31], x[30]);
+ idct64_stage4_high32_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
+ btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]);
+ btf_16_adds_subs_avx2(x[8], x[9]);
+ btf_16_subs_adds_avx2(x[11], x[10]);
+ btf_16_adds_subs_avx2(x[12], x[13]);
+ btf_16_subs_adds_avx2(x[15], x[14]);
+ idct64_stage5_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]);
+ btf_16_adds_subs_avx2(x[4], x[5]);
+ btf_16_subs_adds_avx2(x[7], x[6]);
+ btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ idct64_stage6_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ btf_16_adds_subs_avx2(x[0], x[3]);
+ btf_16_adds_subs_avx2(x[1], x[2]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_avx2(x[8], x[11]);
+ btf_16_adds_subs_avx2(x[9], x[10]);
+ btf_16_subs_adds_avx2(x[15], x[12]);
+ btf_16_subs_adds_avx2(x[14], x[13]);
+ idct64_stage7_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ btf_16_adds_subs_avx2(x[0], x[7]);
+ btf_16_adds_subs_avx2(x[1], x[6]);
+ btf_16_adds_subs_avx2(x[2], x[5]);
+ btf_16_adds_subs_avx2(x[3], x[4]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_avx2(x, cospi, __rounding, cos_bit);
+
+ // stage 9~11
+ idct64_stage9_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_avx2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_avx2(output, x);
+}
+
+// 1D functions process 16 pixels at one time.
+static const transform_1d_avx2
+ lowbd_txfm_all_1d_zeros_w16_arr[TX_SIZES][ITX_TYPES_1D][4] = {
+ {
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ },
+ { { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL } },
+ {
+ { idct16_low1_new_avx2, idct16_low8_new_avx2, idct16_new_avx2, NULL },
+ { iadst16_low1_new_avx2, iadst16_low8_new_avx2, iadst16_new_avx2,
+ NULL },
+ { NULL, NULL, NULL, NULL },
+ },
+ { { idct32_low1_new_avx2, idct32_low8_new_avx2, idct32_low16_new_avx2,
+ idct32_new_avx2 },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL } },
+ { { idct64_low1_new_avx2, idct64_low8_new_avx2, idct64_low16_new_avx2,
+ idct64_low32_new_avx2 },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL } }
+ };
+
+// only process w >= 16 h >= 16
+static INLINE void lowbd_inv_txfm2d_add_no_identity_avx2(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ __m256i buf1[64 * 16];
+ int eobx, eoby;
+ get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div16 = txfm_size_col >> 4;
+ const int buf_size_nonzero_w_div16 = (eobx + 16) >> 4;
+ const int buf_size_nonzero_h_div16 = (eoby + 16) >> 4;
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
+ const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
+ const transform_1d_avx2 row_txfm =
+ lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
+ const transform_1d_avx2 col_txfm =
+ lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
+
+ assert(col_txfm != NULL);
+ assert(row_txfm != NULL);
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_nonzero_h_div16; i++) {
+ __m256i buf0[64];
+ const int32_t *input_row = input + (i << 4) * input_stride;
+ for (int j = 0; j < buf_size_nonzero_w_div16; ++j) {
+ __m256i *buf0_cur = buf0 + j * 16;
+ const int32_t *input_cur = input_row + j * 16;
+ load_buffer_32bit_to_16bit_w16_avx2(input_cur, input_stride, buf0_cur,
+ 16);
+ transpose_16bit_16x16_avx2(buf0_cur, buf0_cur);
+ }
+ if (rect_type == 1 || rect_type == -1) {
+ round_shift_avx2(buf0, buf0, input_stride); // rect special code
+ }
+ row_txfm(buf0, buf0, cos_bit_row);
+ round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]);
+
+ __m256i *buf1_cur = buf1 + (i << 4);
+ if (lr_flip) {
+ for (int j = 0; j < buf_size_w_div16; ++j) {
+ __m256i temp[16];
+ flip_buf_av2(buf0 + 16 * j, temp, 16);
+ int offset = txfm_size_row * (buf_size_w_div16 - 1 - j);
+ transpose_16bit_16x16_avx2(temp, buf1_cur + offset);
+ }
+ } else {
+ for (int j = 0; j < buf_size_w_div16; ++j) {
+ transpose_16bit_16x16_avx2(buf0 + 16 * j, buf1_cur + txfm_size_row * j);
+ }
+ }
+ }
+ for (int i = 0; i < buf_size_w_div16; i++) {
+ __m256i *buf1_cur = buf1 + i * txfm_size_row;
+ col_txfm(buf1_cur, buf1_cur, cos_bit_col);
+ round_shift_16bit_w16_avx2(buf1_cur, txfm_size_row, shift[1]);
+ }
+ for (int i = 0; i < buf_size_w_div16; i++) {
+ lowbd_write_buffer_16xn_avx2(buf1 + i * txfm_size_row, output + 16 * i,
+ stride, ud_flip, txfm_size_row);
+ }
+}
+
+static INLINE void iidentity_row_16xn_avx2(__m256i *out, const int32_t *input,
+ int stride, int shift, int height,
+ int txw_idx, int rect_type) {
+ const int32_t *input_row = input;
+ const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txw_idx]);
+ const __m256i rounding = _mm256_set1_epi16((1 << (NewSqrt2Bits - 1)) +
+ (1 << (NewSqrt2Bits - shift - 1)));
+ const __m256i one = _mm256_set1_epi16(1);
+ const __m256i scale_rounding = _mm256_unpacklo_epi16(scale, rounding);
+ if (rect_type != 1 && rect_type != -1) {
+ for (int i = 0; i < height; ++i) {
+ const __m256i src = load_32bit_to_16bit_w16_avx2(input_row);
+ input_row += stride;
+ __m256i lo = _mm256_unpacklo_epi16(src, one);
+ __m256i hi = _mm256_unpackhi_epi16(src, one);
+ lo = _mm256_madd_epi16(lo, scale_rounding);
+ hi = _mm256_madd_epi16(hi, scale_rounding);
+ lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift);
+ hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift);
+ out[i] = _mm256_packs_epi32(lo, hi);
+ }
+ } else {
+ const __m256i rect_scale =
+ _mm256_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits));
+ for (int i = 0; i < height; ++i) {
+ __m256i src = load_32bit_to_16bit_w16_avx2(input_row);
+ src = _mm256_mulhrs_epi16(src, rect_scale);
+ input_row += stride;
+ __m256i lo = _mm256_unpacklo_epi16(src, one);
+ __m256i hi = _mm256_unpackhi_epi16(src, one);
+ lo = _mm256_madd_epi16(lo, scale_rounding);
+ hi = _mm256_madd_epi16(hi, scale_rounding);
+ lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift);
+ hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift);
+ out[i] = _mm256_packs_epi32(lo, hi);
+ }
+ }
+}
+
+static INLINE void iidentity_col_16xn_avx2(uint8_t *output, int stride,
+ __m256i *buf, int shift, int height,
+ int txh_idx) {
+ const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txh_idx]);
+ const __m256i scale_rounding = _mm256_set1_epi16(1 << (NewSqrt2Bits - 1));
+ const __m256i shift_rounding = _mm256_set1_epi32(1 << (-shift - 1));
+ const __m256i one = _mm256_set1_epi16(1);
+ const __m256i scale_coeff = _mm256_unpacklo_epi16(scale, scale_rounding);
+ for (int h = 0; h < height; ++h) {
+ __m256i lo = _mm256_unpacklo_epi16(buf[h], one);
+ __m256i hi = _mm256_unpackhi_epi16(buf[h], one);
+ lo = _mm256_madd_epi16(lo, scale_coeff);
+ hi = _mm256_madd_epi16(hi, scale_coeff);
+ lo = _mm256_srai_epi32(lo, NewSqrt2Bits);
+ hi = _mm256_srai_epi32(hi, NewSqrt2Bits);
+ lo = _mm256_add_epi32(lo, shift_rounding);
+ hi = _mm256_add_epi32(hi, shift_rounding);
+ lo = _mm256_srai_epi32(lo, -shift);
+ hi = _mm256_srai_epi32(hi, -shift);
+ const __m256i x = _mm256_packs_epi32(lo, hi);
+ write_recon_w16_avx2(x, output);
+ output += stride;
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_idtx_avx2(const int32_t *input,
+ uint8_t *output, int stride,
+ TX_SIZE tx_size,
+ int32_t eob) {
+ (void)eob;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int row_max = AOMMIN(32, txfm_size_row);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+ __m256i buf[32];
+ for (int i = 0; i < input_stride; i += 16) {
+ iidentity_row_16xn_avx2(buf, input + i, input_stride, shift[0], row_max,
+ txw_idx, rect_type);
+ iidentity_col_16xn_avx2(output + i, stride, buf, shift[1], row_max,
+ txh_idx);
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_h_identity_avx2(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ int eobx, eoby;
+ get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int txfm_size_col_notzero = AOMMIN(32, txfm_size_col);
+ const int input_stride = txfm_size_col_notzero;
+ const int buf_size_w_div16 = (eobx + 16) >> 4;
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
+ const transform_1d_avx2 col_txfm =
+ lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
+
+ assert(col_txfm != NULL);
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_w_div16; i++) {
+ __m256i buf0[64];
+ iidentity_row_16xn_avx2(buf0, input + (i << 4), input_stride, shift[0],
+ eoby + 1, txw_idx, rect_type);
+ col_txfm(buf0, buf0, cos_bit_col);
+ __m256i mshift = _mm256_set1_epi16(1 << (15 + shift[1]));
+ int k = ud_flip ? (txfm_size_row - 1) : 0;
+ const int step = ud_flip ? -1 : 1;
+ for (int j = 0; j < txfm_size_row; ++j, k += step) {
+ __m256i res = _mm256_mulhrs_epi16(buf0[k], mshift);
+ write_recon_w16_avx2(res, output + (i << 4) + j * stride);
+ }
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_v_identity_avx2(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ __m256i buf1[64];
+ int eobx, eoby;
+ get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div16 = txfm_size_col >> 4;
+ const int buf_size_h_div16 = (eoby + 16) >> 4;
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
+ const transform_1d_avx2 row_txfm =
+ lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
+
+ assert(row_txfm != NULL);
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_h_div16; i++) {
+ __m256i buf0[64];
+ const int32_t *input_row = input + i * input_stride * 16;
+ for (int j = 0; j < AOMMIN(4, buf_size_w_div16); ++j) {
+ __m256i *buf0_cur = buf0 + j * 16;
+ load_buffer_32bit_to_16bit_w16_avx2(input_row + j * 16, input_stride,
+ buf0_cur, 16);
+ transpose_16bit_16x16_avx2(buf0_cur, buf0_cur);
+ }
+ if (rect_type == 1 || rect_type == -1) {
+ round_shift_avx2(buf0, buf0, input_stride); // rect special code
+ }
+ row_txfm(buf0, buf0, cos_bit_row);
+ round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]);
+ __m256i *_buf1 = buf1;
+ if (lr_flip) {
+ for (int j = 0; j < buf_size_w_div16; ++j) {
+ __m256i temp[16];
+ flip_buf_av2(buf0 + 16 * j, temp, 16);
+ transpose_16bit_16x16_avx2(temp,
+ _buf1 + 16 * (buf_size_w_div16 - 1 - j));
+ }
+ } else {
+ for (int j = 0; j < buf_size_w_div16; ++j) {
+ transpose_16bit_16x16_avx2(buf0 + 16 * j, _buf1 + 16 * j);
+ }
+ }
+ for (int j = 0; j < buf_size_w_div16; ++j) {
+ iidentity_col_16xn_avx2(output + i * 16 * stride + j * 16, stride,
+ buf1 + j * 16, shift[1], 16, txh_idx);
+ }
+ }
+}
+
+// for 32x32,32x64,64x32,64x64,16x32,32x16,64x16,16x64
+static INLINE void lowbd_inv_txfm2d_add_universe_avx2(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT: // ADST in vertical, DCT in horizontal
+ case DCT_ADST: // DCT in vertical, ADST in horizontal
+ case ADST_ADST: // ADST in both directions
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ lowbd_inv_txfm2d_add_no_identity_avx2(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ case IDTX:
+ lowbd_inv_txfm2d_add_idtx_avx2(input, output, stride, tx_size, eob);
+ break;
+ case V_DCT:
+ case V_ADST:
+ case V_FLIPADST:
+ lowbd_inv_txfm2d_add_h_identity_avx2(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ case H_DCT:
+ case H_ADST:
+ case H_FLIPADST:
+ lowbd_inv_txfm2d_add_v_identity_avx2(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ default:
+ av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ }
+}
+
+void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type, TX_SIZE tx_size,
+ int eob) {
+ switch (tx_size) {
+ case TX_4X4:
+ case TX_8X8:
+ case TX_4X8:
+ case TX_8X4:
+ case TX_8X16:
+ case TX_16X8:
+ case TX_4X16:
+ case TX_16X4:
+ case TX_8X32:
+ case TX_32X8:
+ av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ case TX_16X16:
+ case TX_32X32:
+ case TX_64X64:
+ case TX_16X32:
+ case TX_32X16:
+ case TX_32X64:
+ case TX_64X32:
+ case TX_16X64:
+ case TX_64X16:
+ default:
+ lowbd_inv_txfm2d_add_universe_avx2(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ }
+}
+
+void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
+ const TxfmParam *txfm_param) {
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ if (!txfm_param->lossless) {
+ av1_lowbd_inv_txfm2d_add_avx2(dqcoeff, dst, stride, tx_type,
+ txfm_param->tx_size, txfm_param->eob);
+ } else {
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h
new file mode 100644
index 000000000..c17f655c5
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
+#define AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
+
+#include <immintrin.h>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/x86/transpose_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define pair_set_w16_epi16(a, b) \
+ _mm256_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)))
+
+#define btf_16_w16_avx2(w0, w1, in0, in1, out0, out1) \
+ { \
+ __m256i t0 = _mm256_unpacklo_epi16(in0, in1); \
+ __m256i t1 = _mm256_unpackhi_epi16(in0, in1); \
+ __m256i u0 = _mm256_madd_epi16(t0, w0); \
+ __m256i u1 = _mm256_madd_epi16(t1, w0); \
+ __m256i v0 = _mm256_madd_epi16(t0, w1); \
+ __m256i v1 = _mm256_madd_epi16(t1, w1); \
+ \
+ __m256i a0 = _mm256_add_epi32(u0, __rounding); \
+ __m256i a1 = _mm256_add_epi32(u1, __rounding); \
+ __m256i b0 = _mm256_add_epi32(v0, __rounding); \
+ __m256i b1 = _mm256_add_epi32(v1, __rounding); \
+ \
+ __m256i c0 = _mm256_srai_epi32(a0, cos_bit); \
+ __m256i c1 = _mm256_srai_epi32(a1, cos_bit); \
+ __m256i d0 = _mm256_srai_epi32(b0, cos_bit); \
+ __m256i d1 = _mm256_srai_epi32(b1, cos_bit); \
+ \
+ out0 = _mm256_packs_epi32(c0, c1); \
+ out1 = _mm256_packs_epi32(d0, d1); \
+ }
+
+// half input is zero
+#define btf_16_w16_0_avx2(w0, w1, in, out0, out1) \
+ { \
+ const __m256i _w0 = _mm256_set1_epi16(w0 * 8); \
+ const __m256i _w1 = _mm256_set1_epi16(w1 * 8); \
+ const __m256i _in = in; \
+ out0 = _mm256_mulhrs_epi16(_in, _w0); \
+ out1 = _mm256_mulhrs_epi16(_in, _w1); \
+ }
+
+#define btf_16_adds_subs_avx2(in0, in1) \
+ { \
+ const __m256i _in0 = in0; \
+ const __m256i _in1 = in1; \
+ in0 = _mm256_adds_epi16(_in0, _in1); \
+ in1 = _mm256_subs_epi16(_in0, _in1); \
+ }
+
+#define btf_16_subs_adds_avx2(in0, in1) \
+ { \
+ const __m256i _in0 = in0; \
+ const __m256i _in1 = in1; \
+ in1 = _mm256_subs_epi16(_in0, _in1); \
+ in0 = _mm256_adds_epi16(_in0, _in1); \
+ }
+
+#define btf_16_adds_subs_out_avx2(out0, out1, in0, in1) \
+ { \
+ const __m256i _in0 = in0; \
+ const __m256i _in1 = in1; \
+ out0 = _mm256_adds_epi16(_in0, _in1); \
+ out1 = _mm256_subs_epi16(_in0, _in1); \
+ }
+
+static INLINE __m256i load_32bit_to_16bit_w16_avx2(const int32_t *a) {
+ const __m256i a_low = _mm256_lddqu_si256((const __m256i *)a);
+ const __m256i b = _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
+ return _mm256_permute4x64_epi64(b, 0xD8);
+}
+
+static INLINE void load_buffer_32bit_to_16bit_w16_avx2(const int32_t *in,
+ int stride, __m256i *out,
+ int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[i] = load_32bit_to_16bit_w16_avx2(in + i * stride);
+ }
+}
+
+static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
+ __m256i *const out) {
+ // Unpack 16 bit elements. Goes from:
+ // in[0]: 00 01 02 03 08 09 0a 0b 04 05 06 07 0c 0d 0e 0f
+ // in[1]: 10 11 12 13 18 19 1a 1b 14 15 16 17 1c 1d 1e 1f
+ // in[2]: 20 21 22 23 28 29 2a 2b 24 25 26 27 2c 2d 2e 2f
+ // in[3]: 30 31 32 33 38 39 3a 3b 34 35 36 37 3c 3d 3e 3f
+ // in[4]: 40 41 42 43 48 49 4a 4b 44 45 46 47 4c 4d 4e 4f
+ // in[5]: 50 51 52 53 58 59 5a 5b 54 55 56 57 5c 5d 5e 5f
+ // in[6]: 60 61 62 63 68 69 6a 6b 64 65 66 67 6c 6d 6e 6f
+ // in[7]: 70 71 72 73 78 79 7a 7b 74 75 76 77 7c 7d 7e 7f
+ // in[8]: 80 81 82 83 88 89 8a 8b 84 85 86 87 8c 8d 8e 8f
+ // to:
+ // a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
+ // a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
+ // a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
+ // a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
+ // ...
+ __m256i a[16];
+ for (int i = 0; i < 16; i += 2) {
+ a[i / 2 + 0] = _mm256_unpacklo_epi16(in[i], in[i + 1]);
+ a[i / 2 + 8] = _mm256_unpackhi_epi16(in[i], in[i + 1]);
+ }
+ __m256i b[16];
+ for (int i = 0; i < 16; i += 2) {
+ b[i / 2 + 0] = _mm256_unpacklo_epi32(a[i], a[i + 1]);
+ b[i / 2 + 8] = _mm256_unpackhi_epi32(a[i], a[i + 1]);
+ }
+ __m256i c[16];
+ for (int i = 0; i < 16; i += 2) {
+ c[i / 2 + 0] = _mm256_unpacklo_epi64(b[i], b[i + 1]);
+ c[i / 2 + 8] = _mm256_unpackhi_epi64(b[i], b[i + 1]);
+ }
+ out[0 + 0] = _mm256_permute2x128_si256(c[0], c[1], 0x20);
+ out[1 + 0] = _mm256_permute2x128_si256(c[8], c[9], 0x20);
+ out[2 + 0] = _mm256_permute2x128_si256(c[4], c[5], 0x20);
+ out[3 + 0] = _mm256_permute2x128_si256(c[12], c[13], 0x20);
+
+ out[0 + 8] = _mm256_permute2x128_si256(c[0], c[1], 0x31);
+ out[1 + 8] = _mm256_permute2x128_si256(c[8], c[9], 0x31);
+ out[2 + 8] = _mm256_permute2x128_si256(c[4], c[5], 0x31);
+ out[3 + 8] = _mm256_permute2x128_si256(c[12], c[13], 0x31);
+
+ out[4 + 0] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x20);
+ out[5 + 0] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x20);
+ out[6 + 0] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x20);
+ out[7 + 0] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x20);
+
+ out[4 + 8] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x31);
+ out[5 + 8] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x31);
+ out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
+ out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
+}
+
+static INLINE void round_shift_16bit_w16_avx2(__m256i *in, int size, int bit) {
+ if (bit < 0) {
+ __m256i scale = _mm256_set1_epi16(1 << (bit + 15));
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm256_mulhrs_epi16(in[i], scale);
+ }
+ } else if (bit > 0) {
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm256_slli_epi16(in[i], bit);
+ }
+ }
+}
+
+static INLINE void round_shift_avx2(const __m256i *input, __m256i *output,
+ int size) {
+ const __m256i scale = _mm256_set1_epi16(NewInvSqrt2 * 8);
+ for (int i = 0; i < size; ++i) {
+ output[i] = _mm256_mulhrs_epi16(input[i], scale);
+ }
+}
+
+static INLINE void flip_buf_av2(__m256i *in, __m256i *out, int size) {
+ for (int i = 0; i < size; ++i) {
+ out[size - i - 1] = in[i];
+ }
+}
+
+static INLINE void write_recon_w16_avx2(__m256i res, uint8_t *output) {
+ __m128i pred = _mm_loadu_si128((__m128i const *)(output));
+ __m256i u = _mm256_adds_epi16(_mm256_cvtepu8_epi16(pred), res);
+ __m128i y = _mm256_castsi256_si128(
+ _mm256_permute4x64_epi64(_mm256_packus_epi16(u, u), 168));
+ _mm_storeu_si128((__m128i *)(output), y);
+}
+
+static INLINE void lowbd_write_buffer_16xn_avx2(__m256i *in, uint8_t *output,
+ int stride, int flipud,
+ int height) {
+ int j = flipud ? (height - 1) : 0;
+ const int step = flipud ? -1 : 1;
+ for (int i = 0; i < height; ++i, j += step) {
+ write_recon_w16_avx2(in[j], output + i * stride);
+ }
+}
+
+typedef void (*transform_1d_avx2)(const __m256i *input, __m256i *output,
+ int8_t cos_bit);
+
+void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type, TX_SIZE tx_size,
+ int eob);
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c
new file mode 100644
index 000000000..dd7cee24c
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c
@@ -0,0 +1,2917 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "av1/common/av1_inv_txfm1d_cfg.h"
+#include "av1/common/x86/av1_inv_txfm_ssse3.h"
+#include "av1/common/x86/av1_txfm_sse2.h"
+
+// TODO(binpengsmail@gmail.com): replace some for loop with do {} while
+
+static void idct4_new_sse2(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+
+ // stage 1
+ __m128i x[4];
+ x[0] = input[0];
+ x[1] = input[2];
+ x[2] = input[1];
+ x[3] = input[3];
+
+ // stage 2
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+
+ // stage 3
+ btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]);
+ btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
+}
+
+void idct4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+
+ // stage 1
+ __m128i x[4];
+ x[0] = input[0];
+ x[1] = input[2];
+ x[2] = input[1];
+ x[3] = input[3];
+
+ // stage 2
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+
+ // stage 3
+ btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]);
+ btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
+}
+
+void idct8_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m128i x[2];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 4
+ // stage 5
+ output[0] = x[0];
+ output[7] = x[0];
+ output[1] = x[1];
+ output[6] = x[1];
+ output[2] = x[1];
+ output[5] = x[1];
+ output[3] = x[0];
+ output[4] = x[0];
+}
+
+void idct8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[8];
+ x[0] = input[0];
+ x[1] = input[4];
+ x[2] = input[2];
+ x[3] = input[6];
+ x[4] = input[1];
+ x[5] = input[5];
+ x[6] = input[3];
+ x[7] = input[7];
+
+ // stage 2
+ btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
+ btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
+
+ // stage 3
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+
+ // stage 4
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+
+ // stage 5
+ btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]);
+ btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]);
+ btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]);
+ btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]);
+}
+
+void idct8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[8];
+ x[0] = input[0];
+ x[1] = input[4];
+ x[2] = input[2];
+ x[3] = input[6];
+ x[4] = input[1];
+ x[5] = input[5];
+ x[6] = input[3];
+ x[7] = input[7];
+
+ // stage 2
+ btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
+ btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
+
+ // stage 3
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+
+ // stage 4
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+
+ // stage 5
+ btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]);
+ btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]);
+ btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]);
+ btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]);
+}
+
+static INLINE void idct16_stage5_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[11]);
+ btf_16_adds_subs_sse2(x[9], x[10]);
+ btf_16_subs_adds_sse2(x[15], x[12]);
+ btf_16_subs_adds_sse2(x[14], x[13]);
+}
+
+static INLINE void idct16_stage6_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[7]);
+ btf_16_adds_subs_sse2(x[1], x[6]);
+ btf_16_adds_subs_sse2(x[2], x[5]);
+ btf_16_adds_subs_sse2(x[3], x[4]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+}
+
+static INLINE void idct16_stage7_sse2(__m128i *output, __m128i *x) {
+ btf_16_adds_subs_out_sse2(output[0], output[15], x[0], x[15]);
+ btf_16_adds_subs_out_sse2(output[1], output[14], x[1], x[14]);
+ btf_16_adds_subs_out_sse2(output[2], output[13], x[2], x[13]);
+ btf_16_adds_subs_out_sse2(output[3], output[12], x[3], x[12]);
+ btf_16_adds_subs_out_sse2(output[4], output[11], x[4], x[11]);
+ btf_16_adds_subs_out_sse2(output[5], output[10], x[5], x[10]);
+ btf_16_adds_subs_out_sse2(output[6], output[9], x[6], x[9]);
+ btf_16_adds_subs_out_sse2(output[7], output[8], x[7], x[8]);
+}
+
+static void idct16_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m128i x[2];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 5
+ // stage 6
+ // stage 7
+ output[0] = x[0];
+ output[15] = x[0];
+ output[1] = x[1];
+ output[14] = x[1];
+ output[2] = x[1];
+ output[13] = x[1];
+ output[3] = x[0];
+ output[12] = x[0];
+ output[4] = x[0];
+ output[11] = x[0];
+ output[5] = x[1];
+ output[10] = x[1];
+ output[6] = x[1];
+ output[9] = x[1];
+ output[7] = x[0];
+ output[8] = x[0];
+}
+
+static void idct16_low8_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+
+ // stage 1
+ __m128i x[16];
+ x[0] = input[0];
+ x[2] = input[4];
+ x[4] = input[2];
+ x[6] = input[6];
+ x[8] = input[1];
+ x[10] = input[5];
+ x[12] = input[3];
+ x[14] = input[7];
+
+ // stage 2
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
+ btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
+ btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
+
+ // stage 3
+ btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
+ btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+
+ // stage 4
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+
+ idct16_stage5_sse2(x, cospi, __rounding, cos_bit);
+ idct16_stage6_sse2(x, cospi, __rounding, cos_bit);
+ idct16_stage7_sse2(output, x);
+}
+
+void idct16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
+ const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
+ const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
+ const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
+ const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
+ const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
+ const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
+ const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+
+ // stage 1
+ __m128i x[16];
+ x[0] = input[0];
+ x[1] = input[8];
+ x[2] = input[4];
+ x[3] = input[12];
+ x[4] = input[2];
+ x[5] = input[10];
+ x[6] = input[6];
+ x[7] = input[14];
+ x[8] = input[1];
+ x[9] = input[9];
+ x[10] = input[5];
+ x[11] = input[13];
+ x[12] = input[3];
+ x[13] = input[11];
+ x[14] = input[7];
+ x[15] = input[15];
+
+ // stage 2
+ btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
+ btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
+
+ // stage 3
+ btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
+ btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+
+ // stage 4
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+
+ // stage 5~7
+ idct16_stage5_sse2(x, cospi, __rounding, cos_bit);
+ idct16_stage6_sse2(x, cospi, __rounding, cos_bit);
+ idct16_stage7_sse2(output, x);
+}
+
+void idct16_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
+ const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
+ const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
+ const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
+ const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
+ const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
+ const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
+ const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[16];
+ x[0] = input[0];
+ x[1] = input[8];
+ x[2] = input[4];
+ x[3] = input[12];
+ x[4] = input[2];
+ x[5] = input[10];
+ x[6] = input[6];
+ x[7] = input[14];
+ x[8] = input[1];
+ x[9] = input[9];
+ x[10] = input[5];
+ x[11] = input[13];
+ x[12] = input[3];
+ x[13] = input[11];
+ x[14] = input[7];
+ x[15] = input[15];
+
+ // stage 2
+ btf_16_4p_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
+ btf_16_4p_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
+ btf_16_4p_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
+ btf_16_4p_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
+
+ // stage 3
+ btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
+ btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+
+ // stage 4
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+ btf_16_4p_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_4p_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+
+ // stage 5
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[11]);
+ btf_16_adds_subs_sse2(x[9], x[10]);
+ btf_16_subs_adds_sse2(x[15], x[12]);
+ btf_16_subs_adds_sse2(x[14], x[13]);
+
+ // stage 6
+ btf_16_adds_subs_sse2(x[0], x[7]);
+ btf_16_adds_subs_sse2(x[1], x[6]);
+ btf_16_adds_subs_sse2(x[2], x[5]);
+ btf_16_adds_subs_sse2(x[3], x[4]);
+ btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+
+ // stage 7
+ idct16_stage7_sse2(output, x);
+}
+
+static INLINE void idct32_high16_stage3_sse2(__m128i *x) {
+ btf_16_adds_subs_sse2(x[16], x[17]);
+ btf_16_subs_adds_sse2(x[19], x[18]);
+ btf_16_adds_subs_sse2(x[20], x[21]);
+ btf_16_subs_adds_sse2(x[23], x[22]);
+ btf_16_adds_subs_sse2(x[24], x[25]);
+ btf_16_subs_adds_sse2(x[27], x[26]);
+ btf_16_adds_subs_sse2(x[28], x[29]);
+ btf_16_subs_adds_sse2(x[31], x[30]);
+}
+
+static INLINE void idct32_high16_stage4_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
+ const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
+ const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
+ const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
+ const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
+ const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
+ btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
+ btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
+ btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+}
+
+static INLINE void idct32_high24_stage5_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ btf_16_adds_subs_sse2(x[16], x[19]);
+ btf_16_adds_subs_sse2(x[17], x[18]);
+ btf_16_subs_adds_sse2(x[23], x[20]);
+ btf_16_subs_adds_sse2(x[22], x[21]);
+ btf_16_adds_subs_sse2(x[24], x[27]);
+ btf_16_adds_subs_sse2(x[25], x[26]);
+ btf_16_subs_adds_sse2(x[31], x[28]);
+ btf_16_subs_adds_sse2(x[30], x[29]);
+}
+
+static INLINE void idct32_high28_stage6_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[11]);
+ btf_16_adds_subs_sse2(x[9], x[10]);
+ btf_16_subs_adds_sse2(x[15], x[12]);
+ btf_16_subs_adds_sse2(x[14], x[13]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
+}
+
+static INLINE void idct32_stage7_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[7]);
+ btf_16_adds_subs_sse2(x[1], x[6]);
+ btf_16_adds_subs_sse2(x[2], x[5]);
+ btf_16_adds_subs_sse2(x[3], x[4]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ btf_16_adds_subs_sse2(x[16], x[23]);
+ btf_16_adds_subs_sse2(x[17], x[22]);
+ btf_16_adds_subs_sse2(x[18], x[21]);
+ btf_16_adds_subs_sse2(x[19], x[20]);
+ btf_16_subs_adds_sse2(x[31], x[24]);
+ btf_16_subs_adds_sse2(x[30], x[25]);
+ btf_16_subs_adds_sse2(x[29], x[26]);
+ btf_16_subs_adds_sse2(x[28], x[27]);
+}
+
+static INLINE void idct32_stage8_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[15]);
+ btf_16_adds_subs_sse2(x[1], x[14]);
+ btf_16_adds_subs_sse2(x[2], x[13]);
+ btf_16_adds_subs_sse2(x[3], x[12]);
+ btf_16_adds_subs_sse2(x[4], x[11]);
+ btf_16_adds_subs_sse2(x[5], x[10]);
+ btf_16_adds_subs_sse2(x[6], x[9]);
+ btf_16_adds_subs_sse2(x[7], x[8]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
+}
+
+static INLINE void idct32_stage9_sse2(__m128i *output, __m128i *x) {
+ btf_16_adds_subs_out_sse2(output[0], output[31], x[0], x[31]);
+ btf_16_adds_subs_out_sse2(output[1], output[30], x[1], x[30]);
+ btf_16_adds_subs_out_sse2(output[2], output[29], x[2], x[29]);
+ btf_16_adds_subs_out_sse2(output[3], output[28], x[3], x[28]);
+ btf_16_adds_subs_out_sse2(output[4], output[27], x[4], x[27]);
+ btf_16_adds_subs_out_sse2(output[5], output[26], x[5], x[26]);
+ btf_16_adds_subs_out_sse2(output[6], output[25], x[6], x[25]);
+ btf_16_adds_subs_out_sse2(output[7], output[24], x[7], x[24]);
+ btf_16_adds_subs_out_sse2(output[8], output[23], x[8], x[23]);
+ btf_16_adds_subs_out_sse2(output[9], output[22], x[9], x[22]);
+ btf_16_adds_subs_out_sse2(output[10], output[21], x[10], x[21]);
+ btf_16_adds_subs_out_sse2(output[11], output[20], x[11], x[20]);
+ btf_16_adds_subs_out_sse2(output[12], output[19], x[12], x[19]);
+ btf_16_adds_subs_out_sse2(output[13], output[18], x[13], x[18]);
+ btf_16_adds_subs_out_sse2(output[14], output[17], x[14], x[17]);
+ btf_16_adds_subs_out_sse2(output[15], output[16], x[15], x[16]);
+}
+
+static void idct32_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m128i x[2];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ // stage 5
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 6
+ // stage 7
+ // stage 8
+ // stage 9
+ output[0] = x[0];
+ output[31] = x[0];
+ output[1] = x[1];
+ output[30] = x[1];
+ output[2] = x[1];
+ output[29] = x[1];
+ output[3] = x[0];
+ output[28] = x[0];
+ output[4] = x[0];
+ output[27] = x[0];
+ output[5] = x[1];
+ output[26] = x[1];
+ output[6] = x[1];
+ output[25] = x[1];
+ output[7] = x[0];
+ output[24] = x[0];
+ output[8] = x[0];
+ output[23] = x[0];
+ output[9] = x[1];
+ output[22] = x[1];
+ output[10] = x[1];
+ output[21] = x[1];
+ output[11] = x[0];
+ output[20] = x[0];
+ output[12] = x[0];
+ output[19] = x[0];
+ output[13] = x[1];
+ output[18] = x[1];
+ output[14] = x[1];
+ output[17] = x[1];
+ output[15] = x[0];
+ output[16] = x[0];
+}
+
+static void idct32_low8_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m128i x[32];
+ x[0] = input[0];
+ x[4] = input[4];
+ x[8] = input[2];
+ x[12] = input[6];
+ x[16] = input[1];
+ x[20] = input[5];
+ x[24] = input[3];
+ x[28] = input[7];
+
+ // stage 2
+ btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
+
+ // stage 3
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ x[17] = x[16];
+ x[18] = x[19];
+ x[21] = x[20];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[26] = x[27];
+ x[29] = x[28];
+ x[30] = x[31];
+
+ // stage 4
+ btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
+ x[9] = x[8];
+ x[10] = x[11];
+ x[13] = x[12];
+ x[14] = x[15];
+ idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ x[5] = x[4];
+ x[6] = x[7];
+ idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
+ // stage 6
+ x[3] = x[0];
+ x[2] = x[1];
+ idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
+
+ idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage9_sse2(output, x);
+}
+
+static void idct32_low16_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m128i x[32];
+ x[0] = input[0];
+ x[2] = input[8];
+ x[4] = input[4];
+ x[6] = input[12];
+ x[8] = input[2];
+ x[10] = input[10];
+ x[12] = input[6];
+ x[14] = input[14];
+ x[16] = input[1];
+ x[18] = input[9];
+ x[20] = input[5];
+ x[22] = input[13];
+ x[24] = input[3];
+ x[26] = input[11];
+ x[28] = input[7];
+ x[30] = input[15];
+
+ // stage 2
+ btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]);
+ btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]);
+ btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]);
+ btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]);
+ btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
+
+ // stage 3
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
+ btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
+ btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ idct32_high16_stage3_sse2(x);
+
+ // stage 4
+ btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
+ btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+ idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+ idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
+
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
+
+ idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage9_sse2(output, x);
+}
+
+static void idct32_new_sse2(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
+ const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
+ const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
+ const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
+ const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
+ const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
+ const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
+ const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
+ const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
+ const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
+ const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
+ const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
+ const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
+ const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
+ const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
+ const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
+ const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
+ const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
+ const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
+ const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
+ const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
+ const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
+ const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
+ const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+
+ // stage 1
+ __m128i x[32];
+ x[0] = input[0];
+ x[1] = input[16];
+ x[2] = input[8];
+ x[3] = input[24];
+ x[4] = input[4];
+ x[5] = input[20];
+ x[6] = input[12];
+ x[7] = input[28];
+ x[8] = input[2];
+ x[9] = input[18];
+ x[10] = input[10];
+ x[11] = input[26];
+ x[12] = input[6];
+ x[13] = input[22];
+ x[14] = input[14];
+ x[15] = input[30];
+ x[16] = input[1];
+ x[17] = input[17];
+ x[18] = input[9];
+ x[19] = input[25];
+ x[20] = input[5];
+ x[21] = input[21];
+ x[22] = input[13];
+ x[23] = input[29];
+ x[24] = input[3];
+ x[25] = input[19];
+ x[26] = input[11];
+ x[27] = input[27];
+ x[28] = input[7];
+ x[29] = input[23];
+ x[30] = input[15];
+ x[31] = input[31];
+
+ // stage 2
+ btf_16_sse2(cospi_p62_m02, cospi_p02_p62, x[16], x[31], x[16], x[31]);
+ btf_16_sse2(cospi_p30_m34, cospi_p34_p30, x[17], x[30], x[17], x[30]);
+ btf_16_sse2(cospi_p46_m18, cospi_p18_p46, x[18], x[29], x[18], x[29]);
+ btf_16_sse2(cospi_p14_m50, cospi_p50_p14, x[19], x[28], x[19], x[28]);
+ btf_16_sse2(cospi_p54_m10, cospi_p10_p54, x[20], x[27], x[20], x[27]);
+ btf_16_sse2(cospi_p22_m42, cospi_p42_p22, x[21], x[26], x[21], x[26]);
+ btf_16_sse2(cospi_p38_m26, cospi_p26_p38, x[22], x[25], x[22], x[25]);
+ btf_16_sse2(cospi_p06_m58, cospi_p58_p06, x[23], x[24], x[23], x[24]);
+
+ // stage 3
+ btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
+ btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
+ idct32_high16_stage3_sse2(x);
+
+ // stage 4
+ btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
+ btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+ idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_adds_subs_sse2(x[7], x[6]);
+ idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 7~8
+ idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
+ idct32_stage9_sse2(output, x);
+}
+
+static INLINE void idct64_stage4_high32_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
+ const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
+ const __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]);
+ const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
+ const __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
+ const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
+ const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
+ const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
+ const __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]);
+ const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
+ const __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
+ const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
+ btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
+ btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x[34], x[61], x[34], x[61]);
+ btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x[37], x[58], x[37], x[58]);
+ btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
+ btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
+ btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x[42], x[53], x[42], x[53]);
+ btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x[45], x[50], x[45], x[50]);
+ btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
+}
+
+static INLINE void idct64_stage5_high48_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
+ const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
+ const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
+ const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
+ const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
+ const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
+ btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
+ btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
+ btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+ btf_16_adds_subs_sse2(x[32], x[35]);
+ btf_16_adds_subs_sse2(x[33], x[34]);
+ btf_16_subs_adds_sse2(x[39], x[36]);
+ btf_16_subs_adds_sse2(x[38], x[37]);
+ btf_16_adds_subs_sse2(x[40], x[43]);
+ btf_16_adds_subs_sse2(x[41], x[42]);
+ btf_16_subs_adds_sse2(x[47], x[44]);
+ btf_16_subs_adds_sse2(x[46], x[45]);
+ btf_16_adds_subs_sse2(x[48], x[51]);
+ btf_16_adds_subs_sse2(x[49], x[50]);
+ btf_16_subs_adds_sse2(x[55], x[52]);
+ btf_16_subs_adds_sse2(x[54], x[53]);
+ btf_16_adds_subs_sse2(x[56], x[59]);
+ btf_16_adds_subs_sse2(x[57], x[58]);
+ btf_16_subs_adds_sse2(x[63], x[60]);
+ btf_16_subs_adds_sse2(x[62], x[61]);
+}
+
+static INLINE void idct64_stage6_high32_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
+ const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
+ const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
+ const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
+ const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
+ const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
+ btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[34], x[61], x[34], x[61]);
+ btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[35], x[60], x[35], x[60]);
+ btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[36], x[59], x[36], x[59]);
+ btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[37], x[58], x[37], x[58]);
+ btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[42], x[53], x[42], x[53]);
+ btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[43], x[52], x[43], x[52]);
+ btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[44], x[51], x[44], x[51]);
+ btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[45], x[50], x[45], x[50]);
+}
+
+static INLINE void idct64_stage6_high48_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ btf_16_adds_subs_sse2(x[16], x[19]);
+ btf_16_adds_subs_sse2(x[17], x[18]);
+ btf_16_subs_adds_sse2(x[23], x[20]);
+ btf_16_subs_adds_sse2(x[22], x[21]);
+ btf_16_adds_subs_sse2(x[24], x[27]);
+ btf_16_adds_subs_sse2(x[25], x[26]);
+ btf_16_subs_adds_sse2(x[31], x[28]);
+ btf_16_subs_adds_sse2(x[30], x[29]);
+ idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit);
+}
+
+static INLINE void idct64_stage7_high48_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
+ btf_16_adds_subs_sse2(x[32], x[39]);
+ btf_16_adds_subs_sse2(x[33], x[38]);
+ btf_16_adds_subs_sse2(x[34], x[37]);
+ btf_16_adds_subs_sse2(x[35], x[36]);
+ btf_16_subs_adds_sse2(x[47], x[40]);
+ btf_16_subs_adds_sse2(x[46], x[41]);
+ btf_16_subs_adds_sse2(x[45], x[42]);
+ btf_16_subs_adds_sse2(x[44], x[43]);
+ btf_16_adds_subs_sse2(x[48], x[55]);
+ btf_16_adds_subs_sse2(x[49], x[54]);
+ btf_16_adds_subs_sse2(x[50], x[53]);
+ btf_16_adds_subs_sse2(x[51], x[52]);
+ btf_16_subs_adds_sse2(x[63], x[56]);
+ btf_16_subs_adds_sse2(x[62], x[57]);
+ btf_16_subs_adds_sse2(x[61], x[58]);
+ btf_16_subs_adds_sse2(x[60], x[59]);
+}
+
+static INLINE void idct64_stage8_high48_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ btf_16_adds_subs_sse2(x[16], x[23]);
+ btf_16_adds_subs_sse2(x[17], x[22]);
+ btf_16_adds_subs_sse2(x[18], x[21]);
+ btf_16_adds_subs_sse2(x[19], x[20]);
+ btf_16_subs_adds_sse2(x[31], x[24]);
+ btf_16_subs_adds_sse2(x[30], x[25]);
+ btf_16_subs_adds_sse2(x[29], x[26]);
+ btf_16_subs_adds_sse2(x[28], x[27]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[36], x[59], x[36], x[59]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[37], x[58], x[37], x[58]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[38], x[57], x[38], x[57]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[39], x[56], x[39], x[56]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[40], x[55], x[40], x[55]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[41], x[54], x[41], x[54]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[42], x[53], x[42], x[53]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[43], x[52], x[43], x[52]);
+}
+
+static INLINE void idct64_stage9_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[15]);
+ btf_16_adds_subs_sse2(x[1], x[14]);
+ btf_16_adds_subs_sse2(x[2], x[13]);
+ btf_16_adds_subs_sse2(x[3], x[12]);
+ btf_16_adds_subs_sse2(x[4], x[11]);
+ btf_16_adds_subs_sse2(x[5], x[10]);
+ btf_16_adds_subs_sse2(x[6], x[9]);
+ btf_16_adds_subs_sse2(x[7], x[8]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
+ btf_16_adds_subs_sse2(x[32], x[47]);
+ btf_16_adds_subs_sse2(x[33], x[46]);
+ btf_16_adds_subs_sse2(x[34], x[45]);
+ btf_16_adds_subs_sse2(x[35], x[44]);
+ btf_16_adds_subs_sse2(x[36], x[43]);
+ btf_16_adds_subs_sse2(x[37], x[42]);
+ btf_16_adds_subs_sse2(x[38], x[41]);
+ btf_16_adds_subs_sse2(x[39], x[40]);
+ btf_16_subs_adds_sse2(x[63], x[48]);
+ btf_16_subs_adds_sse2(x[62], x[49]);
+ btf_16_subs_adds_sse2(x[61], x[50]);
+ btf_16_subs_adds_sse2(x[60], x[51]);
+ btf_16_subs_adds_sse2(x[59], x[52]);
+ btf_16_subs_adds_sse2(x[58], x[53]);
+ btf_16_subs_adds_sse2(x[57], x[54]);
+ btf_16_subs_adds_sse2(x[56], x[55]);
+}
+
+static INLINE void idct64_stage10_sse2(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ btf_16_adds_subs_sse2(x[0], x[31]);
+ btf_16_adds_subs_sse2(x[1], x[30]);
+ btf_16_adds_subs_sse2(x[2], x[29]);
+ btf_16_adds_subs_sse2(x[3], x[28]);
+ btf_16_adds_subs_sse2(x[4], x[27]);
+ btf_16_adds_subs_sse2(x[5], x[26]);
+ btf_16_adds_subs_sse2(x[6], x[25]);
+ btf_16_adds_subs_sse2(x[7], x[24]);
+ btf_16_adds_subs_sse2(x[8], x[23]);
+ btf_16_adds_subs_sse2(x[9], x[22]);
+ btf_16_adds_subs_sse2(x[10], x[21]);
+ btf_16_adds_subs_sse2(x[11], x[20]);
+ btf_16_adds_subs_sse2(x[12], x[19]);
+ btf_16_adds_subs_sse2(x[13], x[18]);
+ btf_16_adds_subs_sse2(x[14], x[17]);
+ btf_16_adds_subs_sse2(x[15], x[16]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[40], x[55], x[40], x[55]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[41], x[54], x[41], x[54]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[42], x[53], x[42], x[53]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[43], x[52], x[43], x[52]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[44], x[51], x[44], x[51]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[45], x[50], x[45], x[50]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[46], x[49], x[46], x[49]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[47], x[48], x[47], x[48]);
+}
+
+static INLINE void idct64_stage11_sse2(__m128i *output, __m128i *x) {
+ btf_16_adds_subs_out_sse2(output[0], output[63], x[0], x[63]);
+ btf_16_adds_subs_out_sse2(output[1], output[62], x[1], x[62]);
+ btf_16_adds_subs_out_sse2(output[2], output[61], x[2], x[61]);
+ btf_16_adds_subs_out_sse2(output[3], output[60], x[3], x[60]);
+ btf_16_adds_subs_out_sse2(output[4], output[59], x[4], x[59]);
+ btf_16_adds_subs_out_sse2(output[5], output[58], x[5], x[58]);
+ btf_16_adds_subs_out_sse2(output[6], output[57], x[6], x[57]);
+ btf_16_adds_subs_out_sse2(output[7], output[56], x[7], x[56]);
+ btf_16_adds_subs_out_sse2(output[8], output[55], x[8], x[55]);
+ btf_16_adds_subs_out_sse2(output[9], output[54], x[9], x[54]);
+ btf_16_adds_subs_out_sse2(output[10], output[53], x[10], x[53]);
+ btf_16_adds_subs_out_sse2(output[11], output[52], x[11], x[52]);
+ btf_16_adds_subs_out_sse2(output[12], output[51], x[12], x[51]);
+ btf_16_adds_subs_out_sse2(output[13], output[50], x[13], x[50]);
+ btf_16_adds_subs_out_sse2(output[14], output[49], x[14], x[49]);
+ btf_16_adds_subs_out_sse2(output[15], output[48], x[15], x[48]);
+ btf_16_adds_subs_out_sse2(output[16], output[47], x[16], x[47]);
+ btf_16_adds_subs_out_sse2(output[17], output[46], x[17], x[46]);
+ btf_16_adds_subs_out_sse2(output[18], output[45], x[18], x[45]);
+ btf_16_adds_subs_out_sse2(output[19], output[44], x[19], x[44]);
+ btf_16_adds_subs_out_sse2(output[20], output[43], x[20], x[43]);
+ btf_16_adds_subs_out_sse2(output[21], output[42], x[21], x[42]);
+ btf_16_adds_subs_out_sse2(output[22], output[41], x[22], x[41]);
+ btf_16_adds_subs_out_sse2(output[23], output[40], x[23], x[40]);
+ btf_16_adds_subs_out_sse2(output[24], output[39], x[24], x[39]);
+ btf_16_adds_subs_out_sse2(output[25], output[38], x[25], x[38]);
+ btf_16_adds_subs_out_sse2(output[26], output[37], x[26], x[37]);
+ btf_16_adds_subs_out_sse2(output[27], output[36], x[27], x[36]);
+ btf_16_adds_subs_out_sse2(output[28], output[35], x[28], x[35]);
+ btf_16_adds_subs_out_sse2(output[29], output[34], x[29], x[34]);
+ btf_16_adds_subs_out_sse2(output[30], output[33], x[30], x[33]);
+ btf_16_adds_subs_out_sse2(output[31], output[32], x[31], x[32]);
+}
+
+static void idct64_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+
+ // stage 1
+ __m128i x[32];
+ x[0] = input[0];
+
+ // stage 2
+ // stage 3
+ // stage 4
+ // stage 5
+ // stage 6
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+
+ // stage 7
+ // stage 8
+ // stage 9
+ // stage 10
+ // stage 11
+ output[0] = x[0];
+ output[63] = x[0];
+ output[1] = x[1];
+ output[62] = x[1];
+ output[2] = x[1];
+ output[61] = x[1];
+ output[3] = x[0];
+ output[60] = x[0];
+ output[4] = x[0];
+ output[59] = x[0];
+ output[5] = x[1];
+ output[58] = x[1];
+ output[6] = x[1];
+ output[57] = x[1];
+ output[7] = x[0];
+ output[56] = x[0];
+ output[8] = x[0];
+ output[55] = x[0];
+ output[9] = x[1];
+ output[54] = x[1];
+ output[10] = x[1];
+ output[53] = x[1];
+ output[11] = x[0];
+ output[52] = x[0];
+ output[12] = x[0];
+ output[51] = x[0];
+ output[13] = x[1];
+ output[50] = x[1];
+ output[14] = x[1];
+ output[49] = x[1];
+ output[15] = x[0];
+ output[48] = x[0];
+ output[16] = x[0];
+ output[47] = x[0];
+ output[17] = x[1];
+ output[46] = x[1];
+ output[18] = x[1];
+ output[45] = x[1];
+ output[19] = x[0];
+ output[44] = x[0];
+ output[20] = x[0];
+ output[43] = x[0];
+ output[21] = x[1];
+ output[42] = x[1];
+ output[22] = x[1];
+ output[41] = x[1];
+ output[23] = x[0];
+ output[40] = x[0];
+ output[24] = x[0];
+ output[39] = x[0];
+ output[25] = x[1];
+ output[38] = x[1];
+ output[26] = x[1];
+ output[37] = x[1];
+ output[27] = x[0];
+ output[36] = x[0];
+ output[28] = x[0];
+ output[35] = x[0];
+ output[29] = x[1];
+ output[34] = x[1];
+ output[30] = x[1];
+ output[33] = x[1];
+ output[31] = x[0];
+ output[32] = x[0];
+}
+
+static void idct64_low8_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+ const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
+ const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
+ const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
+ const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
+ const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
+ const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
+ const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
+ const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
+ const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
+ const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
+ const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
+ const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[64];
+ x[0] = input[0];
+ x[8] = input[4];
+ x[16] = input[2];
+ x[24] = input[6];
+ x[32] = input[1];
+ x[40] = input[5];
+ x[48] = input[3];
+ x[56] = input[7];
+
+ // stage 2
+ btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ x[33] = x[32];
+ x[38] = x[39];
+ x[41] = x[40];
+ x[46] = x[47];
+ x[49] = x[48];
+ x[54] = x[55];
+ x[57] = x[56];
+ x[62] = x[63];
+
+ // stage 4
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ x[17] = x[16];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[30] = x[31];
+ btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
+ btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
+ btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
+ btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
+
+ // stage 5
+ x[9] = x[8];
+ x[14] = x[15];
+ btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
+ btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
+ x[35] = x[32];
+ x[34] = x[33];
+ x[36] = x[39];
+ x[37] = x[38];
+ x[43] = x[40];
+ x[42] = x[41];
+ x[44] = x[47];
+ x[45] = x[46];
+ x[51] = x[48];
+ x[50] = x[49];
+ x[52] = x[55];
+ x[53] = x[54];
+ x[59] = x[56];
+ x[58] = x[57];
+ x[60] = x[63];
+ x[61] = x[62];
+
+ // stage 6
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ x[19] = x[16];
+ x[18] = x[17];
+ x[20] = x[23];
+ x[21] = x[22];
+ x[27] = x[24];
+ x[26] = x[25];
+ x[28] = x[31];
+ x[29] = x[30];
+ idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ x[3] = x[0];
+ x[2] = x[1];
+ x[11] = x[8];
+ x[10] = x[9];
+ x[12] = x[15];
+ x[13] = x[14];
+ idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ x[7] = x[0];
+ x[6] = x[1];
+ x[5] = x[2];
+ x[4] = x[3];
+ x[9] = x[9];
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_sse2(output, x);
+}
+
+static void idct64_low16_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[64];
+ x[0] = input[0];
+ x[4] = input[8];
+ x[8] = input[4];
+ x[12] = input[12];
+ x[16] = input[2];
+ x[20] = input[10];
+ x[24] = input[6];
+ x[28] = input[14];
+ x[32] = input[1];
+ x[36] = input[9];
+ x[40] = input[5];
+ x[44] = input[13];
+ x[48] = input[3];
+ x[52] = input[11];
+ x[56] = input[7];
+ x[60] = input[15];
+
+ // stage 2
+ btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]);
+ btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]);
+ btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]);
+ btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]);
+ btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ x[33] = x[32];
+ x[34] = x[35];
+ x[37] = x[36];
+ x[38] = x[39];
+ x[41] = x[40];
+ x[42] = x[43];
+ x[45] = x[44];
+ x[46] = x[47];
+ x[49] = x[48];
+ x[50] = x[51];
+ x[53] = x[52];
+ x[54] = x[55];
+ x[57] = x[56];
+ x[58] = x[59];
+ x[61] = x[60];
+ x[62] = x[63];
+
+ // stage 4
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ x[17] = x[16];
+ x[18] = x[19];
+ x[21] = x[20];
+ x[22] = x[23];
+ x[25] = x[24];
+ x[26] = x[27];
+ x[29] = x[28];
+ x[30] = x[31];
+ idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
+ x[9] = x[8];
+ x[10] = x[11];
+ x[13] = x[12];
+ x[14] = x[15];
+ idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ x[5] = x[4];
+ x[6] = x[7];
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ x[3] = x[0];
+ x[2] = x[1];
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[11]);
+ btf_16_adds_subs_sse2(x[9], x[10]);
+ btf_16_subs_adds_sse2(x[15], x[12]);
+ btf_16_subs_adds_sse2(x[14], x[13]);
+ idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ btf_16_adds_subs_sse2(x[0], x[7]);
+ btf_16_adds_subs_sse2(x[1], x[6]);
+ btf_16_adds_subs_sse2(x[2], x[5]);
+ btf_16_adds_subs_sse2(x[3], x[4]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_sse2(output, x);
+}
+
+static void idct64_low32_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
+ const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
+ const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
+ const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
+
+ // stage 1
+ __m128i x[64];
+ x[0] = input[0];
+ x[2] = input[16];
+ x[4] = input[8];
+ x[6] = input[24];
+ x[8] = input[4];
+ x[10] = input[20];
+ x[12] = input[12];
+ x[14] = input[28];
+ x[16] = input[2];
+ x[18] = input[18];
+ x[20] = input[10];
+ x[22] = input[26];
+ x[24] = input[6];
+ x[26] = input[22];
+ x[28] = input[14];
+ x[30] = input[30];
+ x[32] = input[1];
+ x[34] = input[17];
+ x[36] = input[9];
+ x[38] = input[25];
+ x[40] = input[5];
+ x[42] = input[21];
+ x[44] = input[13];
+ x[46] = input[29];
+ x[48] = input[3];
+ x[50] = input[19];
+ x[52] = input[11];
+ x[54] = input[27];
+ x[56] = input[7];
+ x[58] = input[23];
+ x[60] = input[15];
+ x[62] = input[31];
+
+ // stage 2
+ btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
+ btf_16_ssse3(-cospi[33], cospi[31], x[62], x[33], x[62]);
+ btf_16_ssse3(cospi[47], cospi[17], x[34], x[34], x[61]);
+ btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]);
+ btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]);
+ btf_16_ssse3(-cospi[41], cospi[23], x[58], x[37], x[58]);
+ btf_16_ssse3(cospi[39], cospi[25], x[38], x[38], x[57]);
+ btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
+ btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
+ btf_16_ssse3(-cospi[37], cospi[27], x[54], x[41], x[54]);
+ btf_16_ssse3(cospi[43], cospi[21], x[42], x[42], x[53]);
+ btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]);
+ btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]);
+ btf_16_ssse3(-cospi[45], cospi[19], x[50], x[45], x[50]);
+ btf_16_ssse3(cospi[35], cospi[29], x[46], x[46], x[49]);
+ btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
+
+ // stage 3
+ btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
+ btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]);
+ btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]);
+ btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
+ btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
+ btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]);
+ btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]);
+ btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
+ btf_16_adds_subs_sse2(x[32], x[33]);
+ btf_16_subs_adds_sse2(x[35], x[34]);
+ btf_16_adds_subs_sse2(x[36], x[37]);
+ btf_16_subs_adds_sse2(x[39], x[38]);
+ btf_16_adds_subs_sse2(x[40], x[41]);
+ btf_16_subs_adds_sse2(x[43], x[42]);
+ btf_16_adds_subs_sse2(x[44], x[45]);
+ btf_16_subs_adds_sse2(x[47], x[46]);
+ btf_16_adds_subs_sse2(x[48], x[49]);
+ btf_16_subs_adds_sse2(x[51], x[50]);
+ btf_16_adds_subs_sse2(x[52], x[53]);
+ btf_16_subs_adds_sse2(x[55], x[54]);
+ btf_16_adds_subs_sse2(x[56], x[57]);
+ btf_16_subs_adds_sse2(x[59], x[58]);
+ btf_16_adds_subs_sse2(x[60], x[61]);
+ btf_16_subs_adds_sse2(x[63], x[62]);
+
+ // stage 4
+ btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
+ btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
+ btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
+ btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
+ btf_16_adds_subs_sse2(x[16], x[17]);
+ btf_16_subs_adds_sse2(x[19], x[18]);
+ btf_16_adds_subs_sse2(x[20], x[21]);
+ btf_16_subs_adds_sse2(x[23], x[22]);
+ btf_16_adds_subs_sse2(x[24], x[25]);
+ btf_16_subs_adds_sse2(x[27], x[26]);
+ btf_16_adds_subs_sse2(x[28], x[29]);
+ btf_16_subs_adds_sse2(x[31], x[30]);
+ idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 5
+ btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
+ btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[9]);
+ btf_16_subs_adds_sse2(x[11], x[10]);
+ btf_16_adds_subs_sse2(x[12], x[13]);
+ btf_16_subs_adds_sse2(x[15], x[14]);
+ idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 6
+ btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
+ btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[5]);
+ btf_16_subs_adds_sse2(x[7], x[6]);
+ btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
+ btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
+ idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 7
+ btf_16_adds_subs_sse2(x[0], x[3]);
+ btf_16_adds_subs_sse2(x[1], x[2]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
+ btf_16_adds_subs_sse2(x[8], x[11]);
+ btf_16_adds_subs_sse2(x[9], x[10]);
+ btf_16_subs_adds_sse2(x[15], x[12]);
+ btf_16_subs_adds_sse2(x[14], x[13]);
+ idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 8
+ btf_16_adds_subs_sse2(x[0], x[7]);
+ btf_16_adds_subs_sse2(x[1], x[6]);
+ btf_16_adds_subs_sse2(x[2], x[5]);
+ btf_16_adds_subs_sse2(x[3], x[4]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
+ btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
+ idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
+
+ // stage 9~11
+ idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
+ idct64_stage11_sse2(output, x);
+}
+
+void iadst4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
+ const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
+ const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]);
+ const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]);
+ const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]);
+ const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]);
+ const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]);
+ const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]);
+ const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]);
+ __m128i x0[4];
+ x0[0] = input[0];
+ x0[1] = input[1];
+ x0[2] = input[2];
+ x0[3] = input[3];
+
+ __m128i u[4];
+ u[0] = _mm_unpacklo_epi16(x0[0], x0[2]);
+ u[1] = _mm_unpackhi_epi16(x0[0], x0[2]);
+ u[2] = _mm_unpacklo_epi16(x0[1], x0[3]);
+ u[3] = _mm_unpackhi_epi16(x0[1], x0[3]);
+
+ __m128i x1[16];
+ x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4
+ x1[1] = _mm_madd_epi16(u[1], sinpi_p01_p04);
+ x1[2] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1
+ x1[3] = _mm_madd_epi16(u[1], sinpi_p02_m01);
+ x1[4] = _mm_madd_epi16(u[2], sinpi_p03_p02); // x1*sin3 + x3*sin2
+ x1[5] = _mm_madd_epi16(u[3], sinpi_p03_p02);
+ x1[6] = _mm_madd_epi16(u[2], sinpi_p03_m04); // x1*sin3 - x3*sin4
+ x1[7] = _mm_madd_epi16(u[3], sinpi_p03_m04);
+ x1[8] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3
+ x1[9] = _mm_madd_epi16(u[1], sinpi_p03_m03);
+ x1[10] = _mm_madd_epi16(u[2], sinpi_0_p03); // x2*sin3
+ x1[11] = _mm_madd_epi16(u[3], sinpi_0_p03);
+ x1[12] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2
+ x1[13] = _mm_madd_epi16(u[1], sinpi_p04_p02);
+ x1[14] = _mm_madd_epi16(u[2], sinpi_m03_m01); // -x1*sin3 - x3*sin1
+ x1[15] = _mm_madd_epi16(u[3], sinpi_m03_m01);
+
+ __m128i x2[8];
+ x2[0] = _mm_add_epi32(x1[0], x1[4]); // x0*sin1 +x2*sin4 +x1*sin3 +x3*sin2
+ x2[1] = _mm_add_epi32(x1[1], x1[5]);
+ x2[2] = _mm_add_epi32(x1[2], x1[6]); // x0*sin2 -x2*sin1 +x1*sin3 -x3*sin4
+ x2[3] = _mm_add_epi32(x1[3], x1[7]);
+ x2[4] = _mm_add_epi32(x1[8], x1[10]); // x0*sin3 -x2*sin3 +x3*sin3
+ x2[5] = _mm_add_epi32(x1[9], x1[11]);
+ x2[6] = _mm_add_epi32(x1[12], x1[14]); // x0*sin1 +x2*sin4 +x0*sin2 -x2*sin1
+ x2[7] = _mm_add_epi32(x1[13], x1[15]);
+
+ const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+ for (int i = 0; i < 4; ++i) {
+ __m128i out0 = _mm_add_epi32(x2[2 * i], rounding);
+ __m128i out1 = _mm_add_epi32(x2[2 * i + 1], rounding);
+ out0 = _mm_srai_epi32(out0, INV_COS_BIT);
+ out1 = _mm_srai_epi32(out1, INV_COS_BIT);
+ output[i] = _mm_packs_epi32(out0, out1);
+ }
+}
+
+// TODO(binpengsmail@gmail.com):
+// To explore the reuse of VP9 versions of corresponding SSE2 functions and
+// evaluate whether there is a possibility for further speedup.
+void iadst4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
+ const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
+ const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]);
+ const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]);
+ const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]);
+ const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]);
+ const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]);
+ const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]);
+ const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]);
+ __m128i x0[4];
+ x0[0] = input[0];
+ x0[1] = input[1];
+ x0[2] = input[2];
+ x0[3] = input[3];
+
+ __m128i u[2];
+ u[0] = _mm_unpacklo_epi16(x0[0], x0[2]);
+ u[1] = _mm_unpacklo_epi16(x0[1], x0[3]);
+
+ __m128i x1[8];
+ x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4
+ x1[1] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1
+ x1[2] = _mm_madd_epi16(u[1], sinpi_p03_p02); // x1*sin3 + x3*sin2
+ x1[3] = _mm_madd_epi16(u[1], sinpi_p03_m04); // x1*sin3 - x3*sin4
+ x1[4] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3
+ x1[5] = _mm_madd_epi16(u[1], sinpi_0_p03); // x2*sin3
+ x1[6] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2
+ x1[7] = _mm_madd_epi16(u[1], sinpi_m03_m01); // -x1*sin3 - x3*sin1
+
+ __m128i x2[4];
+ x2[0] = _mm_add_epi32(x1[0], x1[2]); // x0*sin1 + x2*sin4 + x1*sin3 + x3*sin2
+ x2[1] = _mm_add_epi32(x1[1], x1[3]); // x0*sin2 - x2*sin1 + x1*sin3 - x3*sin4
+ x2[2] = _mm_add_epi32(x1[4], x1[5]); // x0*sin3 - x2*sin3 + x3*sin3
+ x2[3] = _mm_add_epi32(x1[6], x1[7]); // x0*sin4 + x2*sin2 - x1*sin3 - x3*sin1
+
+ const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+ for (int i = 0; i < 4; ++i) {
+ __m128i out0 = _mm_add_epi32(x2[i], rounding);
+ out0 = _mm_srai_epi32(out0, INV_COS_BIT);
+ output[i] = _mm_packs_epi32(out0, out0);
+ }
+}
+
+static void iadst8_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __zero = _mm_setzero_si128();
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+
+ // stage 1
+ __m128i x[8];
+ x[1] = input[0];
+
+ // stage 2
+ btf_16_ssse3(cospi[60], -cospi[4], x[1], x[0], x[1]);
+
+ // stage 3
+ x[4] = x[0];
+ x[5] = x[1];
+
+ // stage 4
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+
+ // stage 5
+ x[2] = x[0];
+ x[3] = x[1];
+ x[6] = x[4];
+ x[7] = x[5];
+
+ // stage 6
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
+
+ // stage 7
+ output[0] = x[0];
+ output[1] = _mm_subs_epi16(__zero, x[4]);
+ output[2] = x[6];
+ output[3] = _mm_subs_epi16(__zero, x[2]);
+ output[4] = x[3];
+ output[5] = _mm_subs_epi16(__zero, x[7]);
+ output[6] = x[5];
+ output[7] = _mm_subs_epi16(__zero, x[1]);
+}
+
+void iadst8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __zero = _mm_setzero_si128();
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
+ const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
+ const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
+ const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
+ const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
+ const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
+ const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
+ const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+
+ // stage 1
+ __m128i x[8];
+ x[0] = input[7];
+ x[1] = input[0];
+ x[2] = input[5];
+ x[3] = input[2];
+ x[4] = input[3];
+ x[5] = input[4];
+ x[6] = input[1];
+ x[7] = input[6];
+
+ // stage 2
+ btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]);
+ btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]);
+ btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]);
+
+ // stage 3
+ btf_16_adds_subs_sse2(x[0], x[4]);
+ btf_16_adds_subs_sse2(x[1], x[5]);
+ btf_16_adds_subs_sse2(x[2], x[6]);
+ btf_16_adds_subs_sse2(x[3], x[7]);
+
+ // stage 4
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
+
+ // stage 5
+ btf_16_adds_subs_sse2(x[0], x[2]);
+ btf_16_adds_subs_sse2(x[1], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[6]);
+ btf_16_adds_subs_sse2(x[5], x[7]);
+
+ // stage 6
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
+
+ // stage 7
+ output[0] = x[0];
+ output[1] = _mm_subs_epi16(__zero, x[4]);
+ output[2] = x[6];
+ output[3] = _mm_subs_epi16(__zero, x[2]);
+ output[4] = x[3];
+ output[5] = _mm_subs_epi16(__zero, x[7]);
+ output[6] = x[5];
+ output[7] = _mm_subs_epi16(__zero, x[1]);
+}
+
+void iadst8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __zero = _mm_setzero_si128();
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
+ const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
+ const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
+ const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
+ const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
+ const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
+ const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
+ const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+
+ // stage 1
+ __m128i x[8];
+ x[0] = input[7];
+ x[1] = input[0];
+ x[2] = input[5];
+ x[3] = input[2];
+ x[4] = input[3];
+ x[5] = input[4];
+ x[6] = input[1];
+ x[7] = input[6];
+
+ // stage 2
+ btf_16_4p_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]);
+ btf_16_4p_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]);
+ btf_16_4p_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]);
+ btf_16_4p_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]);
+
+ // stage 3
+ btf_16_adds_subs_sse2(x[0], x[4]);
+ btf_16_adds_subs_sse2(x[1], x[5]);
+ btf_16_adds_subs_sse2(x[2], x[6]);
+ btf_16_adds_subs_sse2(x[3], x[7]);
+
+ // stage 4
+ btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
+
+ // stage 5
+ btf_16_adds_subs_sse2(x[0], x[2]);
+ btf_16_adds_subs_sse2(x[1], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[6]);
+ btf_16_adds_subs_sse2(x[5], x[7]);
+
+ // stage 6
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
+
+ // stage 7
+ output[0] = x[0];
+ output[1] = _mm_subs_epi16(__zero, x[4]);
+ output[2] = x[6];
+ output[3] = _mm_subs_epi16(__zero, x[2]);
+ output[4] = x[3];
+ output[5] = _mm_subs_epi16(__zero, x[7]);
+ output[6] = x[5];
+ output[7] = _mm_subs_epi16(__zero, x[1]);
+}
+
+static INLINE void iadst16_stage3_ssse3(__m128i *x) {
+ btf_16_adds_subs_sse2(x[0], x[8]);
+ btf_16_adds_subs_sse2(x[1], x[9]);
+ btf_16_adds_subs_sse2(x[2], x[10]);
+ btf_16_adds_subs_sse2(x[3], x[11]);
+ btf_16_adds_subs_sse2(x[4], x[12]);
+ btf_16_adds_subs_sse2(x[5], x[13]);
+ btf_16_adds_subs_sse2(x[6], x[14]);
+ btf_16_adds_subs_sse2(x[7], x[15]);
+}
+
+static INLINE void iadst16_stage4_ssse3(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
+ const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
+ btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
+ btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]);
+ btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]);
+ btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]);
+}
+
+static INLINE void iadst16_stage5_ssse3(__m128i *x) {
+ btf_16_adds_subs_sse2(x[0], x[4]);
+ btf_16_adds_subs_sse2(x[1], x[5]);
+ btf_16_adds_subs_sse2(x[2], x[6]);
+ btf_16_adds_subs_sse2(x[3], x[7]);
+ btf_16_adds_subs_sse2(x[8], x[12]);
+ btf_16_adds_subs_sse2(x[9], x[13]);
+ btf_16_adds_subs_sse2(x[10], x[14]);
+ btf_16_adds_subs_sse2(x[11], x[15]);
+}
+
+static INLINE void iadst16_stage6_ssse3(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
+ btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]);
+}
+
+static INLINE void iadst16_stage7_ssse3(__m128i *x) {
+ btf_16_adds_subs_sse2(x[0], x[2]);
+ btf_16_adds_subs_sse2(x[1], x[3]);
+ btf_16_adds_subs_sse2(x[4], x[6]);
+ btf_16_adds_subs_sse2(x[5], x[7]);
+ btf_16_adds_subs_sse2(x[8], x[10]);
+ btf_16_adds_subs_sse2(x[9], x[11]);
+ btf_16_adds_subs_sse2(x[12], x[14]);
+ btf_16_adds_subs_sse2(x[13], x[15]);
+}
+
+static INLINE void iadst16_stage8_ssse3(__m128i *x, const int32_t *cospi,
+ const __m128i __rounding,
+ int8_t cos_bit) {
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]);
+ btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]);
+}
+
+static INLINE void iadst16_stage9_ssse3(__m128i *output, __m128i *x) {
+ const __m128i __zero = _mm_setzero_si128();
+ output[0] = x[0];
+ output[1] = _mm_subs_epi16(__zero, x[8]);
+ output[2] = x[12];
+ output[3] = _mm_subs_epi16(__zero, x[4]);
+ output[4] = x[6];
+ output[5] = _mm_subs_epi16(__zero, x[14]);
+ output[6] = x[10];
+ output[7] = _mm_subs_epi16(__zero, x[2]);
+ output[8] = x[3];
+ output[9] = _mm_subs_epi16(__zero, x[11]);
+ output[10] = x[15];
+ output[11] = _mm_subs_epi16(__zero, x[7]);
+ output[12] = x[5];
+ output[13] = _mm_subs_epi16(__zero, x[13]);
+ output[14] = x[9];
+ output[15] = _mm_subs_epi16(__zero, x[1]);
+}
+
+static void iadst16_low1_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+
+ // stage 1
+ __m128i x[16];
+ x[1] = input[0];
+
+ // stage 2
+ btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]);
+
+ // stage 3
+ x[8] = x[0];
+ x[9] = x[1];
+
+ // stage 4
+ btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
+
+ // stage 5
+ x[4] = x[0];
+ x[5] = x[1];
+ x[12] = x[8];
+ x[13] = x[9];
+
+ // stage 6
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
+
+ // stage 7
+ x[2] = x[0];
+ x[3] = x[1];
+ x[6] = x[4];
+ x[7] = x[5];
+ x[10] = x[8];
+ x[11] = x[9];
+ x[14] = x[12];
+ x[15] = x[13];
+
+ iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage9_ssse3(output, x);
+}
+
+static void iadst16_low8_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ // stage 1
+ __m128i x[16];
+ x[1] = input[0];
+ x[3] = input[2];
+ x[5] = input[4];
+ x[7] = input[6];
+ x[8] = input[7];
+ x[10] = input[5];
+ x[12] = input[3];
+ x[14] = input[1];
+
+ // stage 2
+ btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]);
+ btf_16_ssse3(cospi[54], -cospi[10], x[3], x[2], x[3]);
+ btf_16_ssse3(cospi[46], -cospi[18], x[5], x[4], x[5]);
+ btf_16_ssse3(cospi[38], -cospi[26], x[7], x[6], x[7]);
+ btf_16_ssse3(cospi[34], cospi[30], x[8], x[8], x[9]);
+ btf_16_ssse3(cospi[42], cospi[22], x[10], x[10], x[11]);
+ btf_16_ssse3(cospi[50], cospi[14], x[12], x[12], x[13]);
+ btf_16_ssse3(cospi[58], cospi[6], x[14], x[14], x[15]);
+
+ // stage 3
+ iadst16_stage3_ssse3(x);
+ iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage5_ssse3(x);
+ iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage7_ssse3(x);
+ iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage9_ssse3(output, x);
+}
+void iadst16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+ const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
+ const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
+ const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
+ const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
+ const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
+ const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
+ const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
+ const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
+ const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
+ const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
+ const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
+ const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
+ const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
+ const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
+ const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
+ const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
+
+ // stage 1
+ __m128i x[16];
+ x[0] = input[15];
+ x[1] = input[0];
+ x[2] = input[13];
+ x[3] = input[2];
+ x[4] = input[11];
+ x[5] = input[4];
+ x[6] = input[9];
+ x[7] = input[6];
+ x[8] = input[7];
+ x[9] = input[8];
+ x[10] = input[5];
+ x[11] = input[10];
+ x[12] = input[3];
+ x[13] = input[12];
+ x[14] = input[1];
+ x[15] = input[14];
+
+ // stage 2
+ btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]);
+ btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]);
+ btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]);
+ btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]);
+ btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]);
+ btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]);
+ btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]);
+ btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]);
+
+ // stage 3~9
+ iadst16_stage3_ssse3(x);
+ iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage5_ssse3(x);
+ iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage7_ssse3(x);
+ iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
+ iadst16_stage9_ssse3(output, x);
+}
+
+void iadst16_w4_new_sse2(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int32_t *cospi = cospi_arr(INV_COS_BIT);
+ const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
+
+ const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
+ const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
+ const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
+ const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
+ const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
+ const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
+ const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
+ const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
+ const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
+ const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
+ const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
+ const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
+ const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
+ const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
+ const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
+ const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
+ const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
+ const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
+ const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
+ const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
+ const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
+ const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
+ const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
+ const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
+ const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
+ const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
+ const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
+
+ // stage 1
+ __m128i x[16];
+ x[0] = input[15];
+ x[1] = input[0];
+ x[2] = input[13];
+ x[3] = input[2];
+ x[4] = input[11];
+ x[5] = input[4];
+ x[6] = input[9];
+ x[7] = input[6];
+ x[8] = input[7];
+ x[9] = input[8];
+ x[10] = input[5];
+ x[11] = input[10];
+ x[12] = input[3];
+ x[13] = input[12];
+ x[14] = input[1];
+ x[15] = input[14];
+
+ // stage 2
+ btf_16_4p_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]);
+ btf_16_4p_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]);
+ btf_16_4p_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]);
+ btf_16_4p_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]);
+ btf_16_4p_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]);
+ btf_16_4p_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]);
+ btf_16_4p_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]);
+ btf_16_4p_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]);
+
+ // stage 3
+ iadst16_stage3_ssse3(x);
+
+ // stage 4
+ btf_16_4p_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
+ btf_16_4p_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]);
+ btf_16_4p_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]);
+ btf_16_4p_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]);
+
+ // stage 5
+ iadst16_stage5_ssse3(x);
+
+ // stage 6
+ btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
+ btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
+ btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
+ btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]);
+
+ // stage 7
+ iadst16_stage7_ssse3(x);
+
+ // stage 8
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]);
+ btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]);
+
+ // stage 9
+ iadst16_stage9_ssse3(output, x);
+}
+
+static void iidentity4_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int16_t scale_fractional = (NewSqrt2 - (1 << NewSqrt2Bits));
+ const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
+ for (int i = 0; i < 4; ++i) {
+ __m128i x = _mm_mulhrs_epi16(input[i], scale);
+ output[i] = _mm_adds_epi16(x, input[i]);
+ }
+}
+
+static void iidentity8_new_sse2(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ for (int i = 0; i < 8; ++i) {
+ output[i] = _mm_adds_epi16(input[i], input[i]);
+ }
+}
+
+static void iidentity16_new_ssse3(const __m128i *input, __m128i *output,
+ int8_t cos_bit) {
+ (void)cos_bit;
+ const int16_t scale_fractional = 2 * (NewSqrt2 - (1 << NewSqrt2Bits));
+ const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
+ for (int i = 0; i < 16; ++i) {
+ __m128i x = _mm_mulhrs_epi16(input[i], scale);
+ __m128i srcx2 = _mm_adds_epi16(input[i], input[i]);
+ output[i] = _mm_adds_epi16(x, srcx2);
+ }
+}
+
+static INLINE __m128i lowbd_get_recon_8x8_sse2(const __m128i pred,
+ __m128i res) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i x0 = _mm_adds_epi16(res, _mm_unpacklo_epi8(pred, zero));
+ return _mm_packus_epi16(x0, x0);
+}
+
+static INLINE void lowbd_write_buffer_4xn_sse2(__m128i *in, uint8_t *output,
+ int stride, int flipud,
+ const int height) {
+ int j = flipud ? (height - 1) : 0;
+ const int step = flipud ? -1 : 1;
+ const __m128i zero = _mm_setzero_si128();
+ for (int i = 0; i < height; ++i, j += step) {
+ const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride)));
+ __m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero));
+ u = _mm_packus_epi16(u, zero);
+ *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u);
+ }
+}
+
+static INLINE void lowbd_write_buffer_8xn_sse2(__m128i *in, uint8_t *output,
+ int stride, int flipud,
+ const int height) {
+ int j = flipud ? (height - 1) : 0;
+ const int step = flipud ? -1 : 1;
+ for (int i = 0; i < height; ++i, j += step) {
+ const __m128i v = _mm_loadl_epi64((__m128i const *)(output + i * stride));
+ const __m128i u = lowbd_get_recon_8x8_sse2(v, in[j]);
+ _mm_storel_epi64((__m128i *)(output + i * stride), u);
+ }
+}
+
+// 1D functions process process 8 pixels at one time.
+static const transform_1d_ssse3
+ lowbd_txfm_all_1d_w8_arr[TX_SIZES][ITX_TYPES_1D] = {
+ { idct4_new_sse2, iadst4_new_sse2, iidentity4_new_ssse3 },
+ { idct8_new_sse2, iadst8_new_sse2, iidentity8_new_sse2 },
+ { idct16_new_sse2, iadst16_new_sse2, iidentity16_new_ssse3 },
+ { idct32_new_sse2, NULL, NULL },
+ { idct64_low32_new_ssse3, NULL, NULL },
+ };
+
+// functions for blocks with eob at DC and within
+// topleft 8x8, 16x16, 32x32 corner
+static const transform_1d_ssse3
+ lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
+ {
+ { idct4_new_sse2, idct4_new_sse2, NULL, NULL },
+ { iadst4_new_sse2, iadst4_new_sse2, NULL, NULL },
+ { iidentity4_new_ssse3, iidentity4_new_ssse3, NULL, NULL },
+ },
+ { { idct8_low1_new_ssse3, idct8_new_sse2, NULL, NULL },
+ { iadst8_low1_new_ssse3, iadst8_new_sse2, NULL, NULL },
+ { iidentity8_new_sse2, iidentity8_new_sse2, NULL, NULL } },
+ {
+ { idct16_low1_new_ssse3, idct16_low8_new_ssse3, idct16_new_sse2,
+ NULL },
+ { iadst16_low1_new_ssse3, iadst16_low8_new_ssse3, iadst16_new_sse2,
+ NULL },
+ { NULL, NULL, NULL, NULL },
+ },
+ { { idct32_low1_new_ssse3, idct32_low8_new_ssse3, idct32_low16_new_ssse3,
+ idct32_new_sse2 },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL } },
+ { { idct64_low1_new_ssse3, idct64_low8_new_ssse3, idct64_low16_new_ssse3,
+ idct64_low32_new_ssse3 },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL } }
+ };
+
+// 1D functions process process 4 pixels at one time.
+// used in 4x4, 4x8, 4x16, 8x4, 16x4
+static const transform_1d_ssse3
+ lowbd_txfm_all_1d_w4_arr[TX_SIZES][ITX_TYPES_1D] = {
+ { idct4_w4_new_sse2, iadst4_w4_new_sse2, iidentity4_new_ssse3 },
+ { idct8_w4_new_sse2, iadst8_w4_new_sse2, iidentity8_new_sse2 },
+ { idct16_w4_new_sse2, iadst16_w4_new_sse2, iidentity16_new_ssse3 },
+ { NULL, NULL, NULL },
+ { NULL, NULL, NULL },
+ };
+
+static INLINE void iidentity_row_8xn_ssse3(__m128i *out, const int32_t *input,
+ int stride, int shift, int height,
+ int txw_idx, int rect_type) {
+ const int32_t *input_row = input;
+ const __m128i scale = _mm_set1_epi16(NewSqrt2list[txw_idx]);
+ const __m128i rounding = _mm_set1_epi16((1 << (NewSqrt2Bits - 1)) +
+ (1 << (NewSqrt2Bits - shift - 1)));
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i scale_rounding = _mm_unpacklo_epi16(scale, rounding);
+ if (rect_type != 1 && rect_type != -1) {
+ for (int i = 0; i < height; ++i) {
+ const __m128i src = load_32bit_to_16bit(input_row);
+ input_row += stride;
+ __m128i lo = _mm_unpacklo_epi16(src, one);
+ __m128i hi = _mm_unpackhi_epi16(src, one);
+ lo = _mm_madd_epi16(lo, scale_rounding);
+ hi = _mm_madd_epi16(hi, scale_rounding);
+ lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift);
+ hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift);
+ out[i] = _mm_packs_epi32(lo, hi);
+ }
+ } else {
+ const __m128i rect_scale =
+ _mm_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits));
+ for (int i = 0; i < height; ++i) {
+ __m128i src = load_32bit_to_16bit(input_row);
+ src = _mm_mulhrs_epi16(src, rect_scale);
+ input_row += stride;
+ __m128i lo = _mm_unpacklo_epi16(src, one);
+ __m128i hi = _mm_unpackhi_epi16(src, one);
+ lo = _mm_madd_epi16(lo, scale_rounding);
+ hi = _mm_madd_epi16(hi, scale_rounding);
+ lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift);
+ hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift);
+ out[i] = _mm_packs_epi32(lo, hi);
+ }
+ }
+}
+
+static INLINE void iidentity_col_8xn_ssse3(uint8_t *output, int stride,
+ __m128i *buf, int shift, int height,
+ int txh_idx) {
+ const __m128i scale = _mm_set1_epi16(NewSqrt2list[txh_idx]);
+ const __m128i scale_rounding = _mm_set1_epi16(1 << (NewSqrt2Bits - 1));
+ const __m128i shift_rounding = _mm_set1_epi32(1 << (-shift - 1));
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i scale_coeff = _mm_unpacklo_epi16(scale, scale_rounding);
+ const __m128i zero = _mm_setzero_si128();
+ for (int h = 0; h < height; ++h) {
+ __m128i lo = _mm_unpacklo_epi16(buf[h], one);
+ __m128i hi = _mm_unpackhi_epi16(buf[h], one);
+ lo = _mm_madd_epi16(lo, scale_coeff);
+ hi = _mm_madd_epi16(hi, scale_coeff);
+ lo = _mm_srai_epi32(lo, NewSqrt2Bits);
+ hi = _mm_srai_epi32(hi, NewSqrt2Bits);
+ lo = _mm_add_epi32(lo, shift_rounding);
+ hi = _mm_add_epi32(hi, shift_rounding);
+ lo = _mm_srai_epi32(lo, -shift);
+ hi = _mm_srai_epi32(hi, -shift);
+ __m128i x = _mm_packs_epi32(lo, hi);
+
+ const __m128i pred = _mm_loadl_epi64((__m128i const *)(output));
+ x = _mm_adds_epi16(x, _mm_unpacklo_epi8(pred, zero));
+ const __m128i u = _mm_packus_epi16(x, x);
+ _mm_storel_epi64((__m128i *)(output), u);
+ output += stride;
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_idtx_ssse3(const int32_t *input,
+ uint8_t *output, int stride,
+ TX_SIZE tx_size) {
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int row_max = AOMMIN(32, txfm_size_row);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+ __m128i buf[32];
+
+ for (int i = 0; i < (input_stride >> 3); ++i) {
+ iidentity_row_8xn_ssse3(buf, input + 8 * i, input_stride, shift[0], row_max,
+ txw_idx, rect_type);
+ iidentity_col_8xn_ssse3(output + 8 * i, stride, buf, shift[1], row_max,
+ txh_idx);
+ }
+}
+
+void lowbd_inv_txfm2d_add_4x4_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size_, int eob) {
+ (void)tx_size_;
+ (void)eob;
+ __m128i buf[4];
+ const TX_SIZE tx_size = TX_4X4;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row);
+ transpose_16bit_4x4(buf, buf);
+ row_txfm(buf, buf, cos_bit_row);
+ if (lr_flip) {
+ __m128i temp[4];
+ flip_buf_sse2(buf, temp, txfm_size_col);
+ transpose_16bit_4x4(temp, buf);
+ } else {
+ transpose_16bit_4x4(buf, buf);
+ }
+ col_txfm(buf, buf, cos_bit_col);
+ round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
+ lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
+}
+
+static INLINE __m128i lowbd_get_recon_16x16_sse2(const __m128i pred,
+ __m128i res0, __m128i res1) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i x0 = _mm_unpacklo_epi8(pred, zero);
+ __m128i x1 = _mm_unpackhi_epi8(pred, zero);
+ x0 = _mm_adds_epi16(res0, x0);
+ x1 = _mm_adds_epi16(res1, x1);
+ return _mm_packus_epi16(x0, x1);
+}
+
+static INLINE void lowbd_write_buffer_16xn_sse2(__m128i *in, uint8_t *output,
+ int stride, int flipud,
+ int height) {
+ int j = flipud ? (height - 1) : 0;
+ const int step = flipud ? -1 : 1;
+ for (int i = 0; i < height; ++i, j += step) {
+ __m128i v = _mm_loadu_si128((__m128i const *)(output + i * stride));
+ __m128i u = lowbd_get_recon_16x16_sse2(v, in[j], in[j + height]);
+ _mm_storeu_si128((__m128i *)(output + i * stride), u);
+ }
+}
+
+static INLINE void round_shift_ssse3(const __m128i *input, __m128i *output,
+ int size) {
+ const __m128i scale = _mm_set1_epi16(NewInvSqrt2 * 8);
+ for (int i = 0; i < size; ++i) {
+ output[i] = _mm_mulhrs_epi16(input[i], scale);
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_no_identity_ssse3(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ __m128i buf1[64 * 8];
+ int eobx, eoby;
+ get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div8 = txfm_size_col >> 3;
+ const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
+ const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
+ const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
+
+ assert(col_txfm != NULL);
+ assert(row_txfm != NULL);
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
+ __m128i buf0[64];
+ const int32_t *input_row = input + i * input_stride * 8;
+ for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
+ __m128i *buf0_cur = buf0 + j * 8;
+ load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8);
+ transpose_16bit_8x8(buf0_cur, buf0_cur);
+ }
+ if (rect_type == 1 || rect_type == -1) {
+ round_shift_ssse3(buf0, buf0, input_stride); // rect special code
+ }
+ row_txfm(buf0, buf0, cos_bit_row);
+ round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]);
+ __m128i *_buf1 = buf1 + i * 8;
+ if (lr_flip) {
+ for (int j = 0; j < buf_size_w_div8; ++j) {
+ __m128i temp[8];
+ flip_buf_sse2(buf0 + 8 * j, temp, 8);
+ transpose_16bit_8x8(temp,
+ _buf1 + txfm_size_row * (buf_size_w_div8 - 1 - j));
+ }
+ } else {
+ for (int j = 0; j < buf_size_w_div8; ++j) {
+ transpose_16bit_8x8(buf0 + 8 * j, _buf1 + txfm_size_row * j);
+ }
+ }
+ }
+ for (int i = 0; i < buf_size_w_div8; i++) {
+ col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row, cos_bit_col);
+ round_shift_16bit_ssse3(buf1 + i * txfm_size_row, txfm_size_row, shift[1]);
+ }
+
+ if (txfm_size_col >= 16) {
+ for (int i = 0; i < (txfm_size_col >> 4); i++) {
+ lowbd_write_buffer_16xn_sse2(buf1 + i * txfm_size_row * 2,
+ output + 16 * i, stride, ud_flip,
+ txfm_size_row);
+ }
+ } else if (txfm_size_col == 8) {
+ lowbd_write_buffer_8xn_sse2(buf1, output, stride, ud_flip, txfm_size_row);
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_h_identity_ssse3(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ int eobx, eoby;
+ get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div8 = (eobx + 8) >> 3;
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eoby];
+ assert(fun_idx < 5);
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx];
+
+ assert(col_txfm != NULL);
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_w_div8; i++) {
+ __m128i buf0[64];
+ iidentity_row_8xn_ssse3(buf0, input + 8 * i, input_stride, shift[0],
+ eoby + 1, txw_idx, rect_type);
+ col_txfm(buf0, buf0, cos_bit_col);
+ __m128i mshift = _mm_set1_epi16(1 << (15 + shift[1]));
+ int k = ud_flip ? (txfm_size_row - 1) : 0;
+ const int step = ud_flip ? -1 : 1;
+ uint8_t *out = output + 8 * i;
+ for (int j = 0; j < txfm_size_row; ++j, k += step) {
+ const __m128i v = _mm_loadl_epi64((__m128i const *)(out));
+ __m128i res = _mm_mulhrs_epi16(buf0[k], mshift);
+ const __m128i u = lowbd_get_recon_8x8_sse2(v, res);
+ _mm_storel_epi64((__m128i *)(out), u);
+ out += stride;
+ }
+ }
+}
+
+static INLINE void lowbd_inv_txfm2d_add_v_identity_ssse3(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ __m128i buf1[64];
+ int eobx, eoby;
+ get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div8 = txfm_size_col >> 3;
+ const int buf_size_h_div8 = (eoby + 8) >> 3;
+ const int input_stride = AOMMIN(32, txfm_size_col);
+ const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
+
+ const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eobx];
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx];
+
+ assert(row_txfm != NULL);
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ for (int i = 0; i < buf_size_h_div8; i++) {
+ __m128i buf0[64];
+ const int32_t *input_row = input + i * input_stride * 8;
+ for (int j = 0; j < AOMMIN(4, buf_size_w_div8); ++j) {
+ __m128i *buf0_cur = buf0 + j * 8;
+ load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8);
+ transpose_16bit_8x8(buf0_cur, buf0_cur);
+ }
+ if (rect_type == 1 || rect_type == -1) {
+ round_shift_ssse3(buf0, buf0, input_stride); // rect special code
+ }
+ row_txfm(buf0, buf0, cos_bit_row);
+ round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]);
+ __m128i *_buf1 = buf1;
+ if (lr_flip) {
+ for (int j = 0; j < buf_size_w_div8; ++j) {
+ __m128i temp[8];
+ flip_buf_sse2(buf0 + 8 * j, temp, 8);
+ transpose_16bit_8x8(temp, _buf1 + 8 * (buf_size_w_div8 - 1 - j));
+ }
+ } else {
+ for (int j = 0; j < buf_size_w_div8; ++j) {
+ transpose_16bit_8x8(buf0 + 8 * j, _buf1 + 8 * j);
+ }
+ }
+
+ for (int j = 0; j < buf_size_w_div8; ++j) {
+ iidentity_col_8xn_ssse3(output + i * 8 * stride + j * 8, stride,
+ buf1 + j * 8, shift[1], 8, txh_idx);
+ }
+ }
+}
+
+// for 32x32,32x64,64x32,64x64,32x8,8x32,16x32,32x16,64x16,16x64
+static INLINE void lowbd_inv_txfm2d_add_universe_ssse3(
+ const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ switch (tx_type) {
+ case DCT_DCT:
+ lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ case IDTX:
+ lowbd_inv_txfm2d_add_idtx_ssse3(input, output, stride, tx_size);
+ break;
+ case V_DCT:
+ case V_ADST:
+ case V_FLIPADST:
+ lowbd_inv_txfm2d_add_h_identity_ssse3(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ case H_DCT:
+ case H_ADST:
+ case H_FLIPADST:
+ lowbd_inv_txfm2d_add_v_identity_ssse3(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ default:
+ lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ }
+}
+
+void lowbd_inv_txfm2d_add_4x8_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size_, int eob) {
+ (void)tx_size_;
+ (void)eob;
+ __m128i buf[8];
+ const TX_SIZE tx_size = TX_4X8;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row);
+ transpose_16bit_4x8(buf, buf);
+ round_shift_ssse3(buf, buf, txfm_size_col); // rect special code
+ row_txfm(buf, buf, cos_bit_row);
+ // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]);// shift[0] is 0
+ if (lr_flip) {
+ __m128i temp[4];
+ flip_buf_sse2(buf, temp, txfm_size_col);
+ transpose_16bit_8x4(temp, buf);
+ } else {
+ transpose_16bit_8x4(buf, buf);
+ }
+ col_txfm(buf, buf, cos_bit_col);
+ round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
+ lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
+}
+
+void lowbd_inv_txfm2d_add_8x4_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size_, int eob) {
+ (void)tx_size_;
+ (void)eob;
+ __m128i buf[8];
+ const TX_SIZE tx_size = TX_8X4;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]];
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ load_buffer_32bit_to_16bit(input, txfm_size_col, buf, txfm_size_row);
+ transpose_16bit_8x4(buf, buf);
+ round_shift_ssse3(buf, buf, txfm_size_col); // rect special code
+ row_txfm(buf, buf, cos_bit_row);
+ // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]); // shift[0] is 0
+ if (lr_flip) {
+ __m128i temp[8];
+ flip_buf_sse2(buf, temp, txfm_size_col);
+ transpose_16bit_4x8(temp, buf);
+ } else {
+ transpose_16bit_4x8(buf, buf);
+ }
+ col_txfm(buf, buf, cos_bit_col);
+ round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
+ lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
+}
+
+void lowbd_inv_txfm2d_add_4x16_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size_, int eob) {
+ (void)tx_size_;
+ (void)eob;
+ __m128i buf[16];
+ const TX_SIZE tx_size = TX_4X16;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+
+ const int row_one_loop = 8;
+ for (int i = 0; i < 2; ++i) {
+ const int32_t *input_cur = input + i * txfm_size_col * row_one_loop;
+ __m128i *buf_cur = buf + i * row_one_loop;
+ load_buffer_32bit_to_16bit_w4(input_cur, txfm_size_col, buf_cur,
+ row_one_loop);
+ transpose_16bit_4x8(buf_cur, buf_cur);
+ row_txfm(buf_cur, buf_cur, cos_bit_row);
+ round_shift_16bit_ssse3(buf_cur, row_one_loop, shift[0]);
+ if (lr_flip) {
+ __m128i temp[8];
+ flip_buf_sse2(buf_cur, temp, txfm_size_col);
+ transpose_16bit_8x4(temp, buf_cur);
+ } else {
+ transpose_16bit_8x4(buf_cur, buf_cur);
+ }
+ }
+ col_txfm(buf, buf, cos_bit_col);
+ round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
+ lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
+}
+
+void lowbd_inv_txfm2d_add_16x4_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size_, int eob) {
+ (void)tx_size_;
+ (void)eob;
+ __m128i buf[16];
+ const TX_SIZE tx_size = TX_16X4;
+ const int8_t *shift = inv_txfm_shift_ls[tx_size];
+ const int txw_idx = get_txw_idx(tx_size);
+ const int txh_idx = get_txh_idx(tx_size);
+ const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
+ const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int buf_size_w_div8 = txfm_size_col >> 3;
+
+ const transform_1d_ssse3 row_txfm =
+ lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
+ const transform_1d_ssse3 col_txfm =
+ lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]];
+
+ int ud_flip, lr_flip;
+ get_flip_cfg(tx_type, &ud_flip, &lr_flip);
+ const int row_one_loop = 8;
+ for (int i = 0; i < buf_size_w_div8; ++i) {
+ const int32_t *input_cur = input + i * row_one_loop;
+ __m128i *buf_cur = buf + i * row_one_loop;
+ load_buffer_32bit_to_16bit(input_cur, txfm_size_col, buf_cur,
+ txfm_size_row);
+ transpose_16bit_8x4(buf_cur, buf_cur);
+ }
+ row_txfm(buf, buf, cos_bit_row);
+ round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]);
+ if (lr_flip) {
+ __m128i temp[16];
+ flip_buf_sse2(buf, temp, 16);
+ transpose_16bit_4x8(temp, buf);
+ transpose_16bit_4x8(temp + 8, buf + 8);
+ } else {
+ transpose_16bit_4x8(buf, buf);
+ transpose_16bit_4x8(buf + row_one_loop, buf + row_one_loop);
+ }
+ for (int i = 0; i < buf_size_w_div8; i++) {
+ col_txfm(buf + i * row_one_loop, buf + i * row_one_loop, cos_bit_col);
+ round_shift_16bit_ssse3(buf + i * row_one_loop, txfm_size_row, shift[1]);
+ }
+ lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, 4);
+ lowbd_write_buffer_8xn_sse2(buf + 8, output + 8, stride, ud_flip, 4);
+}
+
+void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob) {
+ switch (tx_size) {
+ case TX_4X4:
+ lowbd_inv_txfm2d_add_4x4_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ case TX_4X8:
+ lowbd_inv_txfm2d_add_4x8_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ case TX_8X4:
+ lowbd_inv_txfm2d_add_8x4_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ case TX_4X16:
+ lowbd_inv_txfm2d_add_4x16_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ case TX_16X4:
+ lowbd_inv_txfm2d_add_16x4_ssse3(input, output, stride, tx_type, tx_size,
+ eob);
+ break;
+ default:
+ lowbd_inv_txfm2d_add_universe_ssse3(input, output, stride, tx_type,
+ tx_size, eob);
+ break;
+ }
+}
+void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
+ const TxfmParam *txfm_param) {
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ if (!txfm_param->lossless) {
+ av1_lowbd_inv_txfm2d_add_ssse3(dqcoeff, dst, stride, tx_type,
+ txfm_param->tx_size, txfm_param->eob);
+ } else {
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h
new file mode 100644
index 000000000..dc9be25d2
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
+#define AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
+
+#include <emmintrin.h> // SSE2
+#include <tmmintrin.h> // SSSE3
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/x86/transpose_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define btf_16_ssse3(w0, w1, in, out0, out1) \
+ do { \
+ const __m128i _w0 = _mm_set1_epi16(w0 * 8); \
+ const __m128i _w1 = _mm_set1_epi16(w1 * 8); \
+ const __m128i _in = in; \
+ out0 = _mm_mulhrs_epi16(_in, _w0); \
+ out1 = _mm_mulhrs_epi16(_in, _w1); \
+ } while (0)
+
+#define btf_16_adds_subs_sse2(in0, in1) \
+ do { \
+ const __m128i _in0 = in0; \
+ const __m128i _in1 = in1; \
+ in0 = _mm_adds_epi16(_in0, _in1); \
+ in1 = _mm_subs_epi16(_in0, _in1); \
+ } while (0)
+
+#define btf_16_subs_adds_sse2(in0, in1) \
+ do { \
+ const __m128i _in0 = in0; \
+ const __m128i _in1 = in1; \
+ in1 = _mm_subs_epi16(_in0, _in1); \
+ in0 = _mm_adds_epi16(_in0, _in1); \
+ } while (0)
+
+#define btf_16_adds_subs_out_sse2(out0, out1, in0, in1) \
+ do { \
+ const __m128i _in0 = in0; \
+ const __m128i _in1 = in1; \
+ out0 = _mm_adds_epi16(_in0, _in1); \
+ out1 = _mm_subs_epi16(_in0, _in1); \
+ } while (0)
+
+static INLINE void round_shift_16bit_ssse3(__m128i *in, int size, int bit) {
+ if (bit < 0) {
+ const __m128i scale = _mm_set1_epi16(1 << (15 + bit));
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm_mulhrs_epi16(in[i], scale);
+ }
+ } else if (bit > 0) {
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm_slli_epi16(in[i], bit);
+ }
+ }
+}
+
+// 1D itx types
+typedef enum ATTRIBUTE_PACKED {
+ IDCT_1D,
+ IADST_1D,
+ IFLIPADST_1D = IADST_1D,
+ IIDENTITY_1D,
+ ITX_TYPES_1D,
+} ITX_TYPE_1D;
+
+static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
+ IDCT_1D, IADST_1D, IDCT_1D, IADST_1D,
+ IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D,
+ IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D,
+ IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
+};
+
+static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
+ IDCT_1D, IDCT_1D, IADST_1D, IADST_1D,
+ IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
+ IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
+ IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D,
+};
+
+// Sqrt2, Sqrt2^2, Sqrt2^3, Sqrt2^4, Sqrt2^5
+static int32_t NewSqrt2list[TX_SIZES] = { 5793, 2 * 4096, 2 * 5793, 4 * 4096,
+ 4 * 5793 };
+
+DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
+ 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ av1_eob_to_eobxy_16x16_default[16]) = {
+ 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
+ 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ av1_eob_to_eobxy_32x32_default[32]) = {
+ 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
+ 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
+ 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
+ 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
+ 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
+ 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
+ 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ av1_eob_to_eobxy_16x32_default[32]) = {
+ 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
+ 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
+ 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
+ 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ av1_eob_to_eobxy_32x16_default[16]) = {
+ 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
+ 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
+ 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
+ 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
+ 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
+ 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
+ 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
+};
+
+DECLARE_ALIGNED(16, static const int16_t *,
+ av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
+ NULL,
+ av1_eob_to_eobxy_8x8_default,
+ av1_eob_to_eobxy_16x16_default,
+ av1_eob_to_eobxy_32x32_default,
+ av1_eob_to_eobxy_32x32_default,
+ NULL,
+ NULL,
+ av1_eob_to_eobxy_8x16_default,
+ av1_eob_to_eobxy_16x8_default,
+ av1_eob_to_eobxy_16x32_default,
+ av1_eob_to_eobxy_32x16_default,
+ av1_eob_to_eobxy_32x32_default,
+ av1_eob_to_eobxy_32x32_default,
+ NULL,
+ NULL,
+ av1_eob_to_eobxy_8x32_default,
+ av1_eob_to_eobxy_32x8_default,
+ av1_eob_to_eobxy_16x32_default,
+ av1_eob_to_eobxy_32x16_default,
+};
+
+static const int lowbd_txfm_all_1d_zeros_idx[32] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+};
+
+// Transform block width in log2 for eob (size of 64 map to 32)
+static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
+ 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
+};
+
+static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
+ TX_SIZE tx_size, int eob) {
+ if (eob == 1) {
+ *eobx = 0;
+ *eoby = 0;
+ return;
+ }
+
+ const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
+ const int eob_row = (eob - 1) >> tx_w_log2;
+ const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
+ *eobx = eobxy & 0xFF;
+ *eoby = eobxy >> 8;
+}
+
+static int eob_fill[32] = {
+ 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
+ 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
+};
+
+static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
+ TX_SIZE tx_size, int eob) {
+ eob -= 1;
+ const int txfm_size_col = tx_size_wide[tx_size];
+ const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
+ *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
+ const int temp_eoby = eob / (eobx_max + 1);
+ assert(temp_eoby < 32);
+ *eoby = eob_fill[temp_eoby];
+}
+
+static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
+ TX_SIZE tx_size, int eob) {
+ eob -= 1;
+ const int txfm_size_row = tx_size_high[tx_size];
+ const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
+ *eobx = eob / (eoby_max + 1);
+ *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
+}
+
+typedef void (*transform_1d_ssse3)(const __m128i *input, __m128i *output,
+ int8_t cos_bit);
+
+void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output,
+ int stride, TX_TYPE tx_type,
+ TX_SIZE tx_size, int eob);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
diff --git a/third_party/aom/av1/common/x86/av1_txfm1d_sse4.h b/third_party/aom/av1/common/x86/av1_txfm1d_sse4.h
deleted file mode 100644
index fd0a6ed2c..000000000
--- a/third_party/aom/av1/common/x86/av1_txfm1d_sse4.h
+++ /dev/null
@@ -1,144 +0,0 @@
-#ifndef AV1_TXMF1D_SSE2_H_
-#define AV1_TXMF1D_SSE2_H_
-
-#include <smmintrin.h>
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-
-void av1_idct4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_idct8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_idct16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_idct32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_idct64_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-
-void av1_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-void av1_iadst32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t *cos_bit, const int8_t *stage_range);
-
-static INLINE void transpose_32_4x4(int stride, const __m128i *input,
- __m128i *output) {
- __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
- __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
- __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
- __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
-
- output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
- output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
- output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
- output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-}
-
-// the entire input block can be represent by a grid of 4x4 blocks
-// each 4x4 blocks can be represent by 4 vertical __m128i
-// we first transpose each 4x4 block internally
-// then transpose the grid
-static INLINE void transpose_32(int txfm_size, const __m128i *input,
- __m128i *output) {
- const int num_per_128 = 4;
- const int row_size = txfm_size;
- const int col_size = txfm_size / num_per_128;
- int r, c;
-
- // transpose each 4x4 block internally
- for (r = 0; r < row_size; r += 4) {
- for (c = 0; c < col_size; c++) {
- transpose_32_4x4(col_size, &input[r * col_size + c],
- &output[c * 4 * col_size + r / 4]);
- }
- }
-}
-
-static INLINE __m128i round_shift_32_sse4_1(__m128i vec, int bit) {
- __m128i tmp, round;
- round = _mm_set1_epi32(1 << (bit - 1));
- tmp = _mm_add_epi32(vec, round);
- return _mm_srai_epi32(tmp, bit);
-}
-
-static INLINE void round_shift_array_32_sse4_1(__m128i *input, __m128i *output,
- const int size, const int bit) {
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = round_shift_32_sse4_1(input[i], bit);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = _mm_slli_epi32(input[i], -bit);
- }
- }
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \
- do { \
- __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
- ww0 = _mm_set1_epi32(w0); \
- ww1 = _mm_set1_epi32(w1); \
- in0_w0 = _mm_mullo_epi32(in0, ww0); \
- in1_w1 = _mm_mullo_epi32(in1, ww1); \
- out0 = _mm_add_epi32(in0_w0, in1_w1); \
- out0 = round_shift_32_sse4_1(out0, bit); \
- in0_w1 = _mm_mullo_epi32(in0, ww1); \
- in1_w0 = _mm_mullo_epi32(in1, ww0); \
- out1 = _mm_sub_epi32(in0_w1, in1_w0); \
- out1 = round_shift_32_sse4_1(out1, bit); \
- } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \
- do { \
- __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
- ww0 = _mm_set1_epi32(w0); \
- ww1 = _mm_set1_epi32(w1); \
- in0_w0 = _mm_mullo_epi32(in0, ww0); \
- in1_w1 = _mm_mullo_epi32(in1, ww1); \
- out0 = _mm_add_epi32(in0_w0, in1_w1); \
- out0 = round_shift_32_sse4_1(out0, bit); \
- in0_w1 = _mm_mullo_epi32(in0, ww1); \
- in1_w0 = _mm_mullo_epi32(in1, ww0); \
- out1 = _mm_sub_epi32(in1_w0, in0_w1); \
- out1 = round_shift_32_sse4_1(out1, bit); \
- } while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AV1_TXMF1D_SSE2_H_
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse2.h b/third_party/aom/av1/common/x86/av1_txfm_sse2.h
new file mode 100644
index 000000000..721cfe059
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_txfm_sse2.h
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AV1_COMMON_X86_AV1_TXFM_SSE2_H_
+#define AV1_COMMON_X86_AV1_TXFM_SSE2_H_
+
+#include <emmintrin.h> // SSE2
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/x86/transpose_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "av1/common/av1_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE void btf_16_w4_sse2(
+ const __m128i *const w0, const __m128i *const w1, const __m128i __rounding,
+ const int8_t cos_bit, const __m128i *const in0, const __m128i *const in1,
+ __m128i *const out0, __m128i *const out1) {
+ const __m128i t0 = _mm_unpacklo_epi16(*in0, *in1);
+ const __m128i u0 = _mm_madd_epi16(t0, *w0);
+ const __m128i v0 = _mm_madd_epi16(t0, *w1);
+ const __m128i a0 = _mm_add_epi32(u0, __rounding);
+ const __m128i b0 = _mm_add_epi32(v0, __rounding);
+ const __m128i c0 = _mm_srai_epi32(a0, cos_bit);
+ const __m128i d0 = _mm_srai_epi32(b0, cos_bit);
+
+ *out0 = _mm_packs_epi32(c0, c0);
+ *out1 = _mm_packs_epi32(d0, c0);
+}
+
+#define btf_16_4p_sse2(w0, w1, in0, in1, out0, out1) \
+ { \
+ __m128i t0 = _mm_unpacklo_epi16(in0, in1); \
+ __m128i u0 = _mm_madd_epi16(t0, w0); \
+ __m128i v0 = _mm_madd_epi16(t0, w1); \
+ \
+ __m128i a0 = _mm_add_epi32(u0, __rounding); \
+ __m128i b0 = _mm_add_epi32(v0, __rounding); \
+ \
+ __m128i c0 = _mm_srai_epi32(a0, cos_bit); \
+ __m128i d0 = _mm_srai_epi32(b0, cos_bit); \
+ \
+ out0 = _mm_packs_epi32(c0, c0); \
+ out1 = _mm_packs_epi32(d0, d0); \
+ }
+
+#define btf_16_sse2(w0, w1, in0, in1, out0, out1) \
+ { \
+ __m128i t0 = _mm_unpacklo_epi16(in0, in1); \
+ __m128i t1 = _mm_unpackhi_epi16(in0, in1); \
+ __m128i u0 = _mm_madd_epi16(t0, w0); \
+ __m128i u1 = _mm_madd_epi16(t1, w0); \
+ __m128i v0 = _mm_madd_epi16(t0, w1); \
+ __m128i v1 = _mm_madd_epi16(t1, w1); \
+ \
+ __m128i a0 = _mm_add_epi32(u0, __rounding); \
+ __m128i a1 = _mm_add_epi32(u1, __rounding); \
+ __m128i b0 = _mm_add_epi32(v0, __rounding); \
+ __m128i b1 = _mm_add_epi32(v1, __rounding); \
+ \
+ __m128i c0 = _mm_srai_epi32(a0, cos_bit); \
+ __m128i c1 = _mm_srai_epi32(a1, cos_bit); \
+ __m128i d0 = _mm_srai_epi32(b0, cos_bit); \
+ __m128i d1 = _mm_srai_epi32(b1, cos_bit); \
+ \
+ out0 = _mm_packs_epi32(c0, c1); \
+ out1 = _mm_packs_epi32(d0, d1); \
+ }
+
+static INLINE __m128i load_16bit_to_16bit(const int16_t *a) {
+ return _mm_load_si128((const __m128i *)a);
+}
+
+static INLINE __m128i load_32bit_to_16bit(const int32_t *a) {
+ const __m128i a_low = _mm_load_si128((const __m128i *)a);
+ return _mm_packs_epi32(a_low, *(const __m128i *)(a + 4));
+}
+
+static INLINE __m128i load_32bit_to_16bit_w4(const int32_t *a) {
+ const __m128i a_low = _mm_load_si128((const __m128i *)a);
+ return _mm_packs_epi32(a_low, a_low);
+}
+
+// Store 4 16 bit values. Sign extend the values.
+static INLINE void store_16bit_to_32bit_w4(const __m128i a, int32_t *const b) {
+ const __m128i a_lo = _mm_unpacklo_epi16(a, a);
+ const __m128i a_1 = _mm_srai_epi32(a_lo, 16);
+ _mm_store_si128((__m128i *)b, a_1);
+}
+
+// Store 8 16 bit values. Sign extend the values.
+static INLINE void store_16bit_to_32bit(__m128i a, int32_t *b) {
+ const __m128i a_lo = _mm_unpacklo_epi16(a, a);
+ const __m128i a_hi = _mm_unpackhi_epi16(a, a);
+ const __m128i a_1 = _mm_srai_epi32(a_lo, 16);
+ const __m128i a_2 = _mm_srai_epi32(a_hi, 16);
+ _mm_store_si128((__m128i *)b, a_1);
+ _mm_store_si128((__m128i *)(b + 4), a_2);
+}
+
+static INLINE __m128i scale_round_sse2(const __m128i a, const int scale) {
+ const __m128i scale_rounding = pair_set_epi16(scale, 1 << (NewSqrt2Bits - 1));
+ const __m128i b = _mm_madd_epi16(a, scale_rounding);
+ return _mm_srai_epi32(b, NewSqrt2Bits);
+}
+
+static INLINE void store_rect_16bit_to_32bit_w4(const __m128i a,
+ int32_t *const b) {
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i a_lo = _mm_unpacklo_epi16(a, one);
+ const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
+ _mm_store_si128((__m128i *)b, b_lo);
+}
+
+static INLINE void store_rect_16bit_to_32bit(const __m128i a,
+ int32_t *const b) {
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i a_lo = _mm_unpacklo_epi16(a, one);
+ const __m128i a_hi = _mm_unpackhi_epi16(a, one);
+ const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
+ const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2);
+ _mm_store_si128((__m128i *)b, b_lo);
+ _mm_store_si128((__m128i *)(b + 4), b_hi);
+}
+
+static INLINE void load_buffer_16bit_to_16bit_w4(const int16_t *const in,
+ const int stride,
+ __m128i *const out,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[i] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
+ }
+}
+
+static INLINE void load_buffer_16bit_to_16bit_w4_flip(const int16_t *const in,
+ const int stride,
+ __m128i *const out,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[out_size - i - 1] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
+ }
+}
+
+static INLINE void load_buffer_16bit_to_16bit(const int16_t *in, int stride,
+ __m128i *out, int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[i] = load_16bit_to_16bit(in + i * stride);
+ }
+}
+
+static INLINE void load_buffer_16bit_to_16bit_flip(const int16_t *in,
+ int stride, __m128i *out,
+ int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[out_size - i - 1] = load_16bit_to_16bit(in + i * stride);
+ }
+}
+
+static INLINE void load_buffer_32bit_to_16bit(const int32_t *in, int stride,
+ __m128i *out, int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[i] = load_32bit_to_16bit(in + i * stride);
+ }
+}
+
+static INLINE void load_buffer_32bit_to_16bit_w4(const int32_t *in, int stride,
+ __m128i *out, int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[i] = load_32bit_to_16bit_w4(in + i * stride);
+ }
+}
+
+static INLINE void load_buffer_32bit_to_16bit_flip(const int32_t *in,
+ int stride, __m128i *out,
+ int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ out[out_size - i - 1] = load_32bit_to_16bit(in + i * stride);
+ }
+}
+
+static INLINE void store_buffer_16bit_to_32bit_w4(const __m128i *const in,
+ int32_t *const out,
+ const int stride,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ store_16bit_to_32bit_w4(in[i], out + i * stride);
+ }
+}
+
+static INLINE void store_buffer_16bit_to_32bit_w8(const __m128i *const in,
+ int32_t *const out,
+ const int stride,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ store_16bit_to_32bit(in[i], out + i * stride);
+ }
+}
+
+static INLINE void store_rect_buffer_16bit_to_32bit_w4(const __m128i *const in,
+ int32_t *const out,
+ const int stride,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ store_rect_16bit_to_32bit_w4(in[i], out + i * stride);
+ }
+}
+
+static INLINE void store_rect_buffer_16bit_to_32bit_w8(const __m128i *const in,
+ int32_t *const out,
+ const int stride,
+ const int out_size) {
+ for (int i = 0; i < out_size; ++i) {
+ store_rect_16bit_to_32bit(in[i], out + i * stride);
+ }
+}
+
+static INLINE void store_buffer_16bit_to_16bit_8x8(const __m128i *in,
+ uint16_t *out,
+ const int stride) {
+ for (int i = 0; i < 8; ++i) {
+ _mm_store_si128((__m128i *)(out + i * stride), in[i]);
+ }
+}
+
+static INLINE void round_shift_16bit(__m128i *in, int size, int bit) {
+ if (bit < 0) {
+ bit = -bit;
+ __m128i rounding = _mm_set1_epi16(1 << (bit - 1));
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm_adds_epi16(in[i], rounding);
+ in[i] = _mm_srai_epi16(in[i], bit);
+ }
+ } else if (bit > 0) {
+ for (int i = 0; i < size; ++i) {
+ in[i] = _mm_slli_epi16(in[i], bit);
+ }
+ }
+}
+
+static INLINE void flip_buf_sse2(__m128i *in, __m128i *out, int size) {
+ for (int i = 0; i < size; ++i) {
+ out[size - i - 1] = in[i];
+ }
+}
+
+void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output,
+ int stride, TX_TYPE tx_type, int bd);
+
+typedef void (*transform_1d_sse2)(const __m128i *input, __m128i *output,
+ int8_t cos_bit);
+
+typedef struct {
+ transform_1d_sse2 col, row; // vertical and horizontal
+} transform_2d_sse2;
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // AV1_COMMON_X86_AV1_TXFM_SSE2_H_
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.c b/third_party/aom/av1/common/x86/av1_txfm_sse4.c
new file mode 100644
index 000000000..cccc62f03
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_txfm_sse4.c
@@ -0,0 +1,10 @@
+#include "config/aom_dsp_rtcd.h"
+
+#include "av1/common/av1_txfm.h"
+#include "av1/common/x86/av1_txfm_sse4.h"
+
+void av1_round_shift_array_sse4_1(int32_t *arr, int size, int bit) {
+ __m128i *const vec = (__m128i *)arr;
+ const int vec_size = size >> 2;
+ av1_round_shift_array_32_sse4_1(vec, vec, vec_size, bit);
+}
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.h b/third_party/aom/av1/common/x86/av1_txfm_sse4.h
new file mode 100644
index 000000000..faf7251fa
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_txfm_sse4.h
@@ -0,0 +1,60 @@
+#ifndef AV1_TXFM_SSE4_H_
+#define AV1_TXFM_SSE4_H_
+
+#include <smmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) {
+ __m128i tmp, round;
+ round = _mm_set1_epi32(1 << (bit - 1));
+ tmp = _mm_add_epi32(vec, round);
+ return _mm_srai_epi32(tmp, bit);
+}
+
+static INLINE void av1_round_shift_array_32_sse4_1(__m128i *input,
+ __m128i *output,
+ const int size,
+ const int bit) {
+ if (bit > 0) {
+ int i;
+ for (i = 0; i < size; i++) {
+ output[i] = av1_round_shift_32_sse4_1(input[i], bit);
+ }
+ } else {
+ int i;
+ for (i = 0; i < size; i++) {
+ output[i] = _mm_slli_epi32(input[i], -bit);
+ }
+ }
+}
+
+static INLINE void av1_round_shift_rect_array_32_sse4_1(__m128i *input,
+ __m128i *output,
+ const int size,
+ const int bit) {
+ const __m128i sqrt2 = _mm_set1_epi32(NewSqrt2);
+ if (bit > 0) {
+ int i;
+ for (i = 0; i < size; i++) {
+ const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit);
+ const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
+ output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
+ }
+ } else {
+ int i;
+ for (i = 0; i < size; i++) {
+ const __m128i r0 = _mm_slli_epi32(input[i], -bit);
+ const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
+ output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
+ }
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AV1_TXFM_SSE4_H_
diff --git a/third_party/aom/av1/common/x86/cfl_avx2.c b/third_party/aom/av1/common/x86/cfl_avx2.c
new file mode 100644
index 000000000..a8bfdcce6
--- /dev/null
+++ b/third_party/aom/av1/common/x86/cfl_avx2.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <immintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/cfl.h"
+
+#include "av1/common/x86/cfl_simd.h"
+
+#define CFL_GET_SUBSAMPLE_FUNCTION_AVX2(sub, bd) \
+ CFL_SUBSAMPLE(avx2, sub, bd, 32, 32) \
+ CFL_SUBSAMPLE(avx2, sub, bd, 32, 16) \
+ CFL_SUBSAMPLE(avx2, sub, bd, 32, 8) \
+ cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_avx2( \
+ TX_SIZE tx_size) { \
+ static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \
+ subsample_##bd##_##sub##_4x4_ssse3, /* 4x4 */ \
+ subsample_##bd##_##sub##_8x8_ssse3, /* 8x8 */ \
+ subsample_##bd##_##sub##_16x16_ssse3, /* 16x16 */ \
+ subsample_##bd##_##sub##_32x32_avx2, /* 32x32 */ \
+ cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \
+ subsample_##bd##_##sub##_4x8_ssse3, /* 4x8 */ \
+ subsample_##bd##_##sub##_8x4_ssse3, /* 8x4 */ \
+ subsample_##bd##_##sub##_8x16_ssse3, /* 8x16 */ \
+ subsample_##bd##_##sub##_16x8_ssse3, /* 16x8 */ \
+ subsample_##bd##_##sub##_16x32_ssse3, /* 16x32 */ \
+ subsample_##bd##_##sub##_32x16_avx2, /* 32x16 */ \
+ cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \
+ cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \
+ subsample_##bd##_##sub##_4x16_ssse3, /* 4x16 */ \
+ subsample_##bd##_##sub##_16x4_ssse3, /* 16x4 */ \
+ subsample_##bd##_##sub##_8x32_ssse3, /* 8x32 */ \
+ subsample_##bd##_##sub##_32x8_avx2, /* 32x8 */ \
+ cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \
+ cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \
+ }; \
+ return subfn_##sub[tx_size]; \
+ }
+
+/**
+ * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
+ * precise version of a box filter 4:2:0 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ *
+ * Note: For 4:2:0 luma subsampling, the width will never be greater than 16.
+ */
+static void cfl_luma_subsampling_420_lbd_avx2(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ const __m256i twos = _mm256_set1_epi8(2); // Thirty two twos
+ const int luma_stride = input_stride << 1;
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256;
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride));
+
+ __m256i top_16x16 = _mm256_maddubs_epi16(top, twos);
+ __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos);
+ __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16);
+
+ _mm256_storeu_si256(row, sum_16x16);
+
+ input += luma_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, lbd)
+
+/**
+ * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
+ * precise version of a box filter 4:2:2 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static void cfl_luma_subsampling_422_lbd_avx2(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ const __m256i fours = _mm256_set1_epi8(4); // Thirty two fours
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ __m256i top_16x16 = _mm256_maddubs_epi16(top, fours);
+ _mm256_storeu_si256(row, top_16x16);
+ input += input_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, lbd)
+
+/**
+ * Multiplies the pixels by 8 (scaling in Q3). The AVX2 subsampling is only
+ * performed on block of width 32.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static void cfl_luma_subsampling_444_lbd_avx2(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+ const __m256i zeros = _mm256_setzero_si256();
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ top = _mm256_permute4x64_epi64(top, _MM_SHUFFLE(3, 1, 2, 0));
+
+ __m256i row_lo = _mm256_unpacklo_epi8(top, zeros);
+ row_lo = _mm256_slli_epi16(row_lo, 3);
+ __m256i row_hi = _mm256_unpackhi_epi8(top, zeros);
+ row_hi = _mm256_slli_epi16(row_hi, 3);
+
+ _mm256_storeu_si256(row, row_lo);
+ _mm256_storeu_si256(row + 1, row_hi);
+
+ input += input_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, lbd)
+
+/**
+ * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
+ * precise version of a box filter 4:2:0 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ *
+ * Note: For 4:2:0 luma subsampling, the width will never be greater than 16.
+ */
+static void cfl_luma_subsampling_420_hbd_avx2(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ const int luma_stride = input_stride << 1;
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256;
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride));
+ __m256i sum = _mm256_add_epi16(top, bot);
+
+ __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
+ __m256i bot_1 = _mm256_loadu_si256((__m256i *)(input + 16 + input_stride));
+ __m256i sum_1 = _mm256_add_epi16(top_1, bot_1);
+
+ __m256i hsum = _mm256_hadd_epi16(sum, sum_1);
+ hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0));
+ hsum = _mm256_add_epi16(hsum, hsum);
+
+ _mm256_storeu_si256(row, hsum);
+
+ input += luma_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, hbd)
+
+/**
+ * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
+ * precise version of a box filter 4:2:2 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ *
+ */
+static void cfl_luma_subsampling_422_hbd_avx2(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
+ __m256i hsum = _mm256_hadd_epi16(top, top_1);
+ hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0));
+ hsum = _mm256_slli_epi16(hsum, 2);
+
+ _mm256_storeu_si256(row, hsum);
+
+ input += input_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, hbd)
+
+static void cfl_luma_subsampling_444_hbd_avx2(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3, int width,
+ int height) {
+ (void)width; // Forever 32
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+ do {
+ __m256i top = _mm256_loadu_si256((__m256i *)input);
+ __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
+ _mm256_storeu_si256(row, _mm256_slli_epi16(top, 3));
+ _mm256_storeu_si256(row + 1, _mm256_slli_epi16(top_1, 3));
+ input += input_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, hbd)
+
+static INLINE __m256i predict_unclipped(const __m256i *input, __m256i alpha_q12,
+ __m256i alpha_sign, __m256i dc_q0) {
+ __m256i ac_q3 = _mm256_loadu_si256(input);
+ __m256i ac_sign = _mm256_sign_epi16(alpha_sign, ac_q3);
+ __m256i scaled_luma_q0 =
+ _mm256_mulhrs_epi16(_mm256_abs_epi16(ac_q3), alpha_q12);
+ scaled_luma_q0 = _mm256_sign_epi16(scaled_luma_q0, ac_sign);
+ return _mm256_add_epi16(scaled_luma_q0, dc_q0);
+}
+
+static INLINE void cfl_predict_lbd_avx2(const int16_t *pred_buf_q3,
+ uint8_t *dst, int dst_stride,
+ int alpha_q3, int width, int height) {
+ (void)width;
+ const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3);
+ const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9);
+ const __m256i dc_q0 = _mm256_set1_epi16(*dst);
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+
+ do {
+ __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
+ __m256i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
+ res = _mm256_packus_epi16(res, next);
+ res = _mm256_permute4x64_epi64(res, _MM_SHUFFLE(3, 1, 2, 0));
+ _mm256_storeu_si256((__m256i *)dst, res);
+ dst += dst_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_PREDICT_X(avx2, 32, 8, lbd);
+CFL_PREDICT_X(avx2, 32, 16, lbd);
+CFL_PREDICT_X(avx2, 32, 32, lbd);
+
+cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size) {
+ static const cfl_predict_lbd_fn pred[TX_SIZES_ALL] = {
+ predict_lbd_4x4_ssse3, /* 4x4 */
+ predict_lbd_8x8_ssse3, /* 8x8 */
+ predict_lbd_16x16_ssse3, /* 16x16 */
+ predict_lbd_32x32_avx2, /* 32x32 */
+ cfl_predict_lbd_null, /* 64x64 (invalid CFL size) */
+ predict_lbd_4x8_ssse3, /* 4x8 */
+ predict_lbd_8x4_ssse3, /* 8x4 */
+ predict_lbd_8x16_ssse3, /* 8x16 */
+ predict_lbd_16x8_ssse3, /* 16x8 */
+ predict_lbd_16x32_ssse3, /* 16x32 */
+ predict_lbd_32x16_avx2, /* 32x16 */
+ cfl_predict_lbd_null, /* 32x64 (invalid CFL size) */
+ cfl_predict_lbd_null, /* 64x32 (invalid CFL size) */
+ predict_lbd_4x16_ssse3, /* 4x16 */
+ predict_lbd_16x4_ssse3, /* 16x4 */
+ predict_lbd_8x32_ssse3, /* 8x32 */
+ predict_lbd_32x8_avx2, /* 32x8 */
+ cfl_predict_lbd_null, /* 16x64 (invalid CFL size) */
+ cfl_predict_lbd_null, /* 64x16 (invalid CFL size) */
+ };
+ // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the
+ // function pointer array out of bounds.
+ return pred[tx_size % TX_SIZES_ALL];
+}
+
+static __m256i highbd_max_epi16(int bd) {
+ const __m256i neg_one = _mm256_set1_epi16(-1);
+ // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd)
+ return _mm256_xor_si256(_mm256_slli_epi16(neg_one, bd), neg_one);
+}
+
+static __m256i highbd_clamp_epi16(__m256i u, __m256i zero, __m256i max) {
+ return _mm256_max_epi16(_mm256_min_epi16(u, max), zero);
+}
+
+static INLINE void cfl_predict_hbd_avx2(const int16_t *pred_buf_q3,
+ uint16_t *dst, int dst_stride,
+ int alpha_q3, int bd, int width,
+ int height) {
+ // Use SSSE3 version for smaller widths
+ assert(width == 16 || width == 32);
+ const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3);
+ const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9);
+ const __m256i dc_q0 = _mm256_loadu_si256((__m256i *)dst);
+ const __m256i max = highbd_max_epi16(bd);
+
+ __m256i *row = (__m256i *)pred_buf_q3;
+ const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
+ do {
+ const __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
+ _mm256_storeu_si256((__m256i *)dst,
+ highbd_clamp_epi16(res, _mm256_setzero_si256(), max));
+ if (width == 32) {
+ const __m256i res_1 =
+ predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
+ _mm256_storeu_si256(
+ (__m256i *)(dst + 16),
+ highbd_clamp_epi16(res_1, _mm256_setzero_si256(), max));
+ }
+ dst += dst_stride;
+ } while ((row += CFL_BUF_LINE_I256) < row_end);
+}
+
+CFL_PREDICT_X(avx2, 16, 4, hbd)
+CFL_PREDICT_X(avx2, 16, 8, hbd)
+CFL_PREDICT_X(avx2, 16, 16, hbd)
+CFL_PREDICT_X(avx2, 16, 32, hbd)
+CFL_PREDICT_X(avx2, 32, 8, hbd)
+CFL_PREDICT_X(avx2, 32, 16, hbd)
+CFL_PREDICT_X(avx2, 32, 32, hbd)
+
+cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size) {
+ static const cfl_predict_hbd_fn pred[TX_SIZES_ALL] = {
+ predict_hbd_4x4_ssse3, /* 4x4 */
+ predict_hbd_8x8_ssse3, /* 8x8 */
+ predict_hbd_16x16_avx2, /* 16x16 */
+ predict_hbd_32x32_avx2, /* 32x32 */
+ cfl_predict_hbd_null, /* 64x64 (invalid CFL size) */
+ predict_hbd_4x8_ssse3, /* 4x8 */
+ predict_hbd_8x4_ssse3, /* 8x4 */
+ predict_hbd_8x16_ssse3, /* 8x16 */
+ predict_hbd_16x8_avx2, /* 16x8 */
+ predict_hbd_16x32_avx2, /* 16x32 */
+ predict_hbd_32x16_avx2, /* 32x16 */
+ cfl_predict_hbd_null, /* 32x64 (invalid CFL size) */
+ cfl_predict_hbd_null, /* 64x32 (invalid CFL size) */
+ predict_hbd_4x16_ssse3, /* 4x16 */
+ predict_hbd_16x4_avx2, /* 16x4 */
+ predict_hbd_8x32_ssse3, /* 8x32 */
+ predict_hbd_32x8_avx2, /* 32x8 */
+ cfl_predict_hbd_null, /* 16x64 (invalid CFL size) */
+ cfl_predict_hbd_null, /* 64x16 (invalid CFL size) */
+ };
+ // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the
+ // function pointer array out of bounds.
+ return pred[tx_size % TX_SIZES_ALL];
+}
+
+// Returns a vector where all the (32-bits) elements are the sum of all the
+// lanes in a.
+static INLINE __m256i fill_sum_epi32(__m256i a) {
+ // Given that a == [A, B, C, D, E, F, G, H]
+ a = _mm256_hadd_epi32(a, a);
+ // Given that A' == A + B, C' == C + D, E' == E + F, G' == G + H
+ // a == [A', C', A', C', E', G', E', G']
+ a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0));
+ // a == [A', C', E', G', A', C', E', G']
+ a = _mm256_hadd_epi32(a, a);
+ // Given that A'' == A' + C' and E'' == E' + G'
+ // a == [A'', E'', A'', E'', A'', E'', A'', E'']
+ return _mm256_hadd_epi32(a, a);
+ // Given that A''' == A'' + E''
+ // a == [A''', A''', A''', A''', A''', A''', A''', A''']
+}
+
+static INLINE __m256i _mm256_addl_epi16(__m256i a) {
+ return _mm256_add_epi32(_mm256_unpacklo_epi16(a, _mm256_setzero_si256()),
+ _mm256_unpackhi_epi16(a, _mm256_setzero_si256()));
+}
+
+static INLINE void subtract_average_avx2(const uint16_t *src_ptr,
+ int16_t *dst_ptr, int width,
+ int height, int round_offset,
+ int num_pel_log2) {
+ // Use SSE2 version for smaller widths
+ assert(width == 16 || width == 32);
+
+ const __m256i *src = (__m256i *)src_ptr;
+ const __m256i *const end = src + height * CFL_BUF_LINE_I256;
+ // To maximize usage of the AVX2 registers, we sum two rows per loop
+ // iteration
+ const int step = 2 * CFL_BUF_LINE_I256;
+
+ __m256i sum = _mm256_setzero_si256();
+ // For width 32, we use a second sum accumulator to reduce accumulator
+ // dependencies in the loop.
+ __m256i sum2;
+ if (width == 32) sum2 = _mm256_setzero_si256();
+
+ do {
+ // Add top row to the bottom row
+ __m256i l0 = _mm256_add_epi16(_mm256_loadu_si256(src),
+ _mm256_loadu_si256(src + CFL_BUF_LINE_I256));
+ sum = _mm256_add_epi32(sum, _mm256_addl_epi16(l0));
+ if (width == 32) { /* Don't worry, this if it gets optimized out. */
+ // Add the second part of the top row to the second part of the bottom row
+ __m256i l1 =
+ _mm256_add_epi16(_mm256_loadu_si256(src + 1),
+ _mm256_loadu_si256(src + 1 + CFL_BUF_LINE_I256));
+ sum2 = _mm256_add_epi32(sum2, _mm256_addl_epi16(l1));
+ }
+ src += step;
+ } while (src < end);
+ // Combine both sum accumulators
+ if (width == 32) sum = _mm256_add_epi32(sum, sum2);
+
+ __m256i fill = fill_sum_epi32(sum);
+
+ __m256i avg_epi16 = _mm256_srli_epi32(
+ _mm256_add_epi32(fill, _mm256_set1_epi32(round_offset)), num_pel_log2);
+ avg_epi16 = _mm256_packs_epi32(avg_epi16, avg_epi16);
+
+ // Store and subtract loop
+ src = (__m256i *)src_ptr;
+ __m256i *dst = (__m256i *)dst_ptr;
+ do {
+ _mm256_storeu_si256(dst,
+ _mm256_sub_epi16(_mm256_loadu_si256(src), avg_epi16));
+ if (width == 32) {
+ _mm256_storeu_si256(
+ dst + 1, _mm256_sub_epi16(_mm256_loadu_si256(src + 1), avg_epi16));
+ }
+ src += CFL_BUF_LINE_I256;
+ dst += CFL_BUF_LINE_I256;
+ } while (src < end);
+}
+
+// Declare wrappers for AVX2 sizes
+CFL_SUB_AVG_X(avx2, 16, 4, 32, 6)
+CFL_SUB_AVG_X(avx2, 16, 8, 64, 7)
+CFL_SUB_AVG_X(avx2, 16, 16, 128, 8)
+CFL_SUB_AVG_X(avx2, 16, 32, 256, 9)
+CFL_SUB_AVG_X(avx2, 32, 8, 128, 8)
+CFL_SUB_AVG_X(avx2, 32, 16, 256, 9)
+CFL_SUB_AVG_X(avx2, 32, 32, 512, 10)
+
+// Based on the observation that for small blocks AVX2 does not outperform
+// SSE2, we call the SSE2 code for block widths 4 and 8.
+cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size) {
+ static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {
+ subtract_average_4x4_sse2, /* 4x4 */
+ subtract_average_8x8_sse2, /* 8x8 */
+ subtract_average_16x16_avx2, /* 16x16 */
+ subtract_average_32x32_avx2, /* 32x32 */
+ cfl_subtract_average_null, /* 64x64 (invalid CFL size) */
+ subtract_average_4x8_sse2, /* 4x8 */
+ subtract_average_8x4_sse2, /* 8x4 */
+ subtract_average_8x16_sse2, /* 8x16 */
+ subtract_average_16x8_avx2, /* 16x8 */
+ subtract_average_16x32_avx2, /* 16x32 */
+ subtract_average_32x16_avx2, /* 32x16 */
+ cfl_subtract_average_null, /* 32x64 (invalid CFL size) */
+ cfl_subtract_average_null, /* 64x32 (invalid CFL size) */
+ subtract_average_4x16_sse2, /* 4x16 */
+ subtract_average_16x4_avx2, /* 16x4 */
+ subtract_average_8x32_sse2, /* 8x32 */
+ subtract_average_32x8_avx2, /* 32x8 */
+ cfl_subtract_average_null, /* 16x64 (invalid CFL size) */
+ cfl_subtract_average_null, /* 64x16 (invalid CFL size) */
+ };
+ // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to
+ // index the function pointer array out of bounds.
+ return sub_avg[tx_size % TX_SIZES_ALL];
+}
diff --git a/third_party/aom/av1/common/x86/cfl_simd.h b/third_party/aom/av1/common/x86/cfl_simd.h
new file mode 100644
index 000000000..7479ac3e1
--- /dev/null
+++ b/third_party/aom/av1/common/x86/cfl_simd.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/blockd.h"
+
+// SSSE3 version is optimal for with == 4, we reuse them in AVX2
+void subsample_lbd_420_4x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_4x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_4x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_lbd_420_8x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_8x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_8x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_8x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 16, we reuse it in AVX2
+void subsample_lbd_420_16x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_16x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_16x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_420_16x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 4, we reuse them in AVX2
+void subsample_lbd_422_4x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_4x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_4x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_lbd_422_8x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_8x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_8x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_8x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 16, we reuse it in AVX2
+void subsample_lbd_422_16x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_16x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_16x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_422_16x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 4, we reuse them in AVX2
+void subsample_lbd_444_4x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_4x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_4x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_lbd_444_8x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_8x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_8x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_8x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 16, we reuse it in AVX2
+void subsample_lbd_444_16x4_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_16x8_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_16x16_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_lbd_444_16x32_ssse3(const uint8_t *input, int input_stride,
+ uint16_t *output_q3);
+
+void subsample_hbd_420_4x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_4x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_4x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_hbd_420_8x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_8x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_8x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_8x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is faster for with == 16, we reuse it in AVX2
+void subsample_hbd_420_16x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_16x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_16x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_420_16x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+void subsample_hbd_422_4x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_4x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_4x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_hbd_422_8x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_8x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_8x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_8x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is faster for with == 16, we reuse it in AVX2
+void subsample_hbd_422_16x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_16x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_16x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_422_16x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+void subsample_hbd_444_4x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_4x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_4x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is optimal for with == 8, we reuse it in AVX2
+void subsample_hbd_444_8x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_8x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_8x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_8x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSSE3 version is faster for with == 16, we reuse it in AVX2
+void subsample_hbd_444_16x4_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_16x8_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_16x16_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+void subsample_hbd_444_16x32_ssse3(const uint16_t *input, int input_stride,
+ uint16_t *output_q3);
+
+// SSE2 version is optimal for with == 4, we reuse them in AVX2
+void subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst);
+void subtract_average_4x8_sse2(const uint16_t *src, int16_t *dst);
+void subtract_average_4x16_sse2(const uint16_t *src, int16_t *dst);
+
+// SSE2 version is optimal for with == 8, we reuse them in AVX2
+void subtract_average_8x4_sse2(const uint16_t *src, int16_t *dst);
+void subtract_average_8x8_sse2(const uint16_t *src, int16_t *dst);
+void subtract_average_8x16_sse2(const uint16_t *src, int16_t *dst);
+void subtract_average_8x32_sse2(const uint16_t *src, int16_t *dst);
+
+void predict_lbd_4x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_4x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_4x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+
+void predict_lbd_8x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_8x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_8x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_8x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+
+void predict_lbd_16x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_16x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_16x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+void predict_lbd_16x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
+ int dst_stride, int alpha_q3);
+
+void predict_hbd_4x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_4x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_4x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+
+void predict_hbd_8x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_8x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_8x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_8x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+
+void predict_hbd_16x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_16x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_16x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
+void predict_hbd_16x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
+ int dst_stride, int alpha_q3, int bd);
diff --git a/third_party/aom/av1/common/x86/cfl_sse2.c b/third_party/aom/av1/common/x86/cfl_sse2.c
new file mode 100644
index 000000000..4783fe098
--- /dev/null
+++ b/third_party/aom/av1/common/x86/cfl_sse2.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+
+#include "av1/common/cfl.h"
+#include "config/av1_rtcd.h"
+
+static INLINE __m128i fill_sum_epi32(__m128i l0) {
+ l0 = _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(1, 0, 3, 2)));
+ return _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(2, 3, 0, 1)));
+}
+
+static INLINE void subtract_average_sse2(const uint16_t *src_ptr,
+ int16_t *dst_ptr, int width,
+ int height, int round_offset,
+ int num_pel_log2) {
+ const __m128i zeros = _mm_setzero_si128();
+ const __m128i round_offset_epi32 = _mm_set1_epi32(round_offset);
+ const __m128i *src = (__m128i *)src_ptr;
+ const __m128i *const end = src + height * CFL_BUF_LINE_I128;
+ const int step = CFL_BUF_LINE_I128 * (1 + (width == 8) + 3 * (width == 4));
+
+ __m128i sum = zeros;
+ do {
+ __m128i l0;
+ if (width == 4) {
+ l0 = _mm_add_epi16(_mm_loadl_epi64(src),
+ _mm_loadl_epi64(src + CFL_BUF_LINE_I128));
+ __m128i l1 = _mm_add_epi16(_mm_loadl_epi64(src + 2 * CFL_BUF_LINE_I128),
+ _mm_loadl_epi64(src + 3 * CFL_BUF_LINE_I128));
+ sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
+ _mm_unpacklo_epi16(l1, zeros)));
+ } else {
+ if (width == 8) {
+ l0 = _mm_add_epi16(_mm_loadu_si128(src),
+ _mm_loadu_si128(src + CFL_BUF_LINE_I128));
+ } else {
+ l0 = _mm_add_epi16(_mm_loadu_si128(src), _mm_loadu_si128(src + 1));
+ }
+ sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
+ _mm_unpackhi_epi16(l0, zeros)));
+ if (width == 32) {
+ l0 = _mm_add_epi16(_mm_loadu_si128(src + 2), _mm_loadu_si128(src + 3));
+ sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
+ _mm_unpackhi_epi16(l0, zeros)));
+ }
+ }
+ src += step;
+ } while (src < end);
+
+ sum = fill_sum_epi32(sum);
+
+ __m128i avg_epi16 =
+ _mm_srli_epi32(_mm_add_epi32(sum, round_offset_epi32), num_pel_log2);
+ avg_epi16 = _mm_packs_epi32(avg_epi16, avg_epi16);
+
+ src = (__m128i *)src_ptr;
+ __m128i *dst = (__m128i *)dst_ptr;
+ do {
+ if (width == 4) {
+ _mm_storel_epi64(dst, _mm_sub_epi16(_mm_loadl_epi64(src), avg_epi16));
+ } else {
+ _mm_storeu_si128(dst, _mm_sub_epi16(_mm_loadu_si128(src), avg_epi16));
+ if (width > 8) {
+ _mm_storeu_si128(dst + 1,
+ _mm_sub_epi16(_mm_loadu_si128(src + 1), avg_epi16));
+ if (width == 32) {
+ _mm_storeu_si128(dst + 2,
+ _mm_sub_epi16(_mm_loadu_si128(src + 2), avg_epi16));
+ _mm_storeu_si128(dst + 3,
+ _mm_sub_epi16(_mm_loadu_si128(src + 3), avg_epi16));
+ }
+ }
+ }
+ src += CFL_BUF_LINE_I128;
+ dst += CFL_BUF_LINE_I128;
+ } while (src < end);
+}
+
+CFL_SUB_AVG_FN(sse2)
diff --git a/third_party/aom/av1/common/x86/cfl_ssse3.c b/third_party/aom/av1/common/x86/cfl_ssse3.c
new file mode 100644
index 000000000..bbf007295
--- /dev/null
+++ b/third_party/aom/av1/common/x86/cfl_ssse3.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/cfl.h"
+
+#include "av1/common/x86/cfl_simd.h"
+
+// Load 32-bit integer from memory into the first element of dst.
+static INLINE __m128i _mm_loadh_epi32(__m128i const *mem_addr) {
+ return _mm_cvtsi32_si128(*((int *)mem_addr));
+}
+
+// Store 32-bit integer from the first element of a into memory.
+static INLINE void _mm_storeh_epi32(__m128i const *mem_addr, __m128i a) {
+ *((int *)mem_addr) = _mm_cvtsi128_si32(a);
+}
+
+/**
+ * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
+ * precise version of a box filter 4:2:0 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static INLINE void cfl_luma_subsampling_420_lbd_ssse3(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ const __m128i twos = _mm_set1_epi8(2);
+ __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
+ const __m128i *end = pred_buf_m128i + (height >> 1) * CFL_BUF_LINE_I128;
+ const int luma_stride = input_stride << 1;
+ do {
+ if (width == 4) {
+ __m128i top = _mm_loadh_epi32((__m128i *)input);
+ top = _mm_maddubs_epi16(top, twos);
+ __m128i bot = _mm_loadh_epi32((__m128i *)(input + input_stride));
+ bot = _mm_maddubs_epi16(bot, twos);
+ const __m128i sum = _mm_add_epi16(top, bot);
+ _mm_storeh_epi32(pred_buf_m128i, sum);
+ } else if (width == 8) {
+ __m128i top = _mm_loadl_epi64((__m128i *)input);
+ top = _mm_maddubs_epi16(top, twos);
+ __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride));
+ bot = _mm_maddubs_epi16(bot, twos);
+ const __m128i sum = _mm_add_epi16(top, bot);
+ _mm_storel_epi64(pred_buf_m128i, sum);
+ } else {
+ __m128i top = _mm_loadu_si128((__m128i *)input);
+ top = _mm_maddubs_epi16(top, twos);
+ __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride));
+ bot = _mm_maddubs_epi16(bot, twos);
+ const __m128i sum = _mm_add_epi16(top, bot);
+ _mm_storeu_si128(pred_buf_m128i, sum);
+ if (width == 32) {
+ __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ __m128i bot_1 =
+ _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1);
+ top_1 = _mm_maddubs_epi16(top_1, twos);
+ bot_1 = _mm_maddubs_epi16(bot_1, twos);
+ __m128i sum_1 = _mm_add_epi16(top_1, bot_1);
+ _mm_storeu_si128(pred_buf_m128i + 1, sum_1);
+ }
+ }
+ input += luma_stride;
+ pred_buf_m128i += CFL_BUF_LINE_I128;
+ } while (pred_buf_m128i < end);
+}
+
+/**
+ * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
+ * precise version of a box filter 4:2:2 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static INLINE void cfl_luma_subsampling_422_lbd_ssse3(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ const __m128i fours = _mm_set1_epi8(4);
+ __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
+ const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
+ do {
+ if (width == 4) {
+ __m128i top = _mm_loadh_epi32((__m128i *)input);
+ top = _mm_maddubs_epi16(top, fours);
+ _mm_storeh_epi32(pred_buf_m128i, top);
+ } else if (width == 8) {
+ __m128i top = _mm_loadl_epi64((__m128i *)input);
+ top = _mm_maddubs_epi16(top, fours);
+ _mm_storel_epi64(pred_buf_m128i, top);
+ } else {
+ __m128i top = _mm_loadu_si128((__m128i *)input);
+ top = _mm_maddubs_epi16(top, fours);
+ _mm_storeu_si128(pred_buf_m128i, top);
+ if (width == 32) {
+ __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ top_1 = _mm_maddubs_epi16(top_1, fours);
+ _mm_storeu_si128(pred_buf_m128i + 1, top_1);
+ }
+ }
+ input += input_stride;
+ pred_buf_m128i += CFL_BUF_LINE_I128;
+ } while (pred_buf_m128i < end);
+}
+
+/**
+ * Multiplies the pixels by 8 (scaling in Q3).
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static INLINE void cfl_luma_subsampling_444_lbd_ssse3(const uint8_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ const __m128i zeros = _mm_setzero_si128();
+ const int luma_stride = input_stride;
+ __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
+ const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
+ do {
+ if (width == 4) {
+ __m128i row = _mm_loadh_epi32((__m128i *)input);
+ row = _mm_unpacklo_epi8(row, zeros);
+ _mm_storel_epi64(pred_buf_m128i, _mm_slli_epi16(row, 3));
+ } else if (width == 8) {
+ __m128i row = _mm_loadl_epi64((__m128i *)input);
+ row = _mm_unpacklo_epi8(row, zeros);
+ _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row, 3));
+ } else {
+ __m128i row = _mm_loadu_si128((__m128i *)input);
+ const __m128i row_lo = _mm_unpacklo_epi8(row, zeros);
+ const __m128i row_hi = _mm_unpackhi_epi8(row, zeros);
+ _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row_lo, 3));
+ _mm_storeu_si128(pred_buf_m128i + 1, _mm_slli_epi16(row_hi, 3));
+ if (width == 32) {
+ __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ const __m128i row_1_lo = _mm_unpacklo_epi8(row_1, zeros);
+ const __m128i row_1_hi = _mm_unpackhi_epi8(row_1, zeros);
+ _mm_storeu_si128(pred_buf_m128i + 2, _mm_slli_epi16(row_1_lo, 3));
+ _mm_storeu_si128(pred_buf_m128i + 3, _mm_slli_epi16(row_1_hi, 3));
+ }
+ }
+ input += luma_stride;
+ pred_buf_m128i += CFL_BUF_LINE_I128;
+ } while (pred_buf_m128i < end);
+}
+
+/**
+ * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
+ * precise version of a box filter 4:2:0 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static INLINE void cfl_luma_subsampling_420_hbd_ssse3(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
+ const int luma_stride = input_stride << 1;
+ do {
+ if (width == 4) {
+ const __m128i top = _mm_loadl_epi64((__m128i *)input);
+ const __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride));
+ __m128i sum = _mm_add_epi16(top, bot);
+ sum = _mm_hadd_epi16(sum, sum);
+ *((int *)pred_buf_q3) = _mm_cvtsi128_si32(_mm_add_epi16(sum, sum));
+ } else {
+ const __m128i top = _mm_loadu_si128((__m128i *)input);
+ const __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride));
+ __m128i sum = _mm_add_epi16(top, bot);
+ if (width == 8) {
+ sum = _mm_hadd_epi16(sum, sum);
+ _mm_storel_epi64((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum));
+ } else {
+ const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ const __m128i bot_1 =
+ _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1);
+ sum = _mm_hadd_epi16(sum, _mm_add_epi16(top_1, bot_1));
+ _mm_storeu_si128((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum));
+ if (width == 32) {
+ const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2);
+ const __m128i bot_2 =
+ _mm_loadu_si128(((__m128i *)(input + input_stride)) + 2);
+ const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3);
+ const __m128i bot_3 =
+ _mm_loadu_si128(((__m128i *)(input + input_stride)) + 3);
+ const __m128i sum_2 = _mm_add_epi16(top_2, bot_2);
+ const __m128i sum_3 = _mm_add_epi16(top_3, bot_3);
+ __m128i next_sum = _mm_hadd_epi16(sum_2, sum_3);
+ _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1,
+ _mm_add_epi16(next_sum, next_sum));
+ }
+ }
+ }
+ input += luma_stride;
+ } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
+}
+
+/**
+ * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
+ * precise version of a box filter 4:2:2 pixel subsampling in Q3.
+ *
+ * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
+ * active area is specified using width and height.
+ *
+ * Note: We don't need to worry about going over the active area, as long as we
+ * stay inside the CfL prediction buffer.
+ */
+static INLINE void cfl_luma_subsampling_422_hbd_ssse3(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
+ const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
+ do {
+ if (width == 4) {
+ const __m128i top = _mm_loadl_epi64((__m128i *)input);
+ const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2);
+ _mm_storeh_epi32(pred_buf_m128i, sum);
+ } else {
+ const __m128i top = _mm_loadu_si128((__m128i *)input);
+ if (width == 8) {
+ const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2);
+ _mm_storel_epi64(pred_buf_m128i, sum);
+ } else {
+ const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top_1), 2);
+ _mm_storeu_si128(pred_buf_m128i, sum);
+ if (width == 32) {
+ const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2);
+ const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3);
+ const __m128i sum_1 = _mm_slli_epi16(_mm_hadd_epi16(top_2, top_3), 2);
+ _mm_storeu_si128(pred_buf_m128i + 1, sum_1);
+ }
+ }
+ }
+ pred_buf_m128i += CFL_BUF_LINE_I128;
+ input += input_stride;
+ } while (pred_buf_m128i < end);
+}
+
+static INLINE void cfl_luma_subsampling_444_hbd_ssse3(const uint16_t *input,
+ int input_stride,
+ uint16_t *pred_buf_q3,
+ int width, int height) {
+ const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
+ do {
+ if (width == 4) {
+ const __m128i row = _mm_slli_epi16(_mm_loadl_epi64((__m128i *)input), 3);
+ _mm_storel_epi64((__m128i *)pred_buf_q3, row);
+ } else {
+ const __m128i row = _mm_slli_epi16(_mm_loadu_si128((__m128i *)input), 3);
+ _mm_storeu_si128((__m128i *)pred_buf_q3, row);
+ if (width >= 16) {
+ __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1);
+ row_1 = _mm_slli_epi16(row_1, 3);
+ _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1, row_1);
+ if (width == 32) {
+ __m128i row_2 = _mm_loadu_si128(((__m128i *)input) + 2);
+ row_2 = _mm_slli_epi16(row_2, 3);
+ _mm_storeu_si128(((__m128i *)pred_buf_q3) + 2, row_2);
+ __m128i row_3 = _mm_loadu_si128(((__m128i *)input) + 3);
+ row_3 = _mm_slli_epi16(row_3, 3);
+ _mm_storeu_si128(((__m128i *)pred_buf_q3) + 3, row_3);
+ }
+ }
+ }
+ input += input_stride;
+ pred_buf_q3 += CFL_BUF_LINE;
+ } while (pred_buf_q3 < end);
+}
+
+CFL_GET_SUBSAMPLE_FUNCTION(ssse3)
+
+static INLINE __m128i predict_unclipped(const __m128i *input, __m128i alpha_q12,
+ __m128i alpha_sign, __m128i dc_q0) {
+ __m128i ac_q3 = _mm_loadu_si128(input);
+ __m128i ac_sign = _mm_sign_epi16(alpha_sign, ac_q3);
+ __m128i scaled_luma_q0 = _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12);
+ scaled_luma_q0 = _mm_sign_epi16(scaled_luma_q0, ac_sign);
+ return _mm_add_epi16(scaled_luma_q0, dc_q0);
+}
+
+static INLINE void cfl_predict_lbd_ssse3(const int16_t *pred_buf_q3,
+ uint8_t *dst, int dst_stride,
+ int alpha_q3, int width, int height) {
+ const __m128i alpha_sign = _mm_set1_epi16(alpha_q3);
+ const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
+ const __m128i dc_q0 = _mm_set1_epi16(*dst);
+ __m128i *row = (__m128i *)pred_buf_q3;
+ const __m128i *row_end = row + height * CFL_BUF_LINE_I128;
+ do {
+ __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
+ if (width < 16) {
+ res = _mm_packus_epi16(res, res);
+ if (width == 4)
+ _mm_storeh_epi32((__m128i *)dst, res);
+ else
+ _mm_storel_epi64((__m128i *)dst, res);
+ } else {
+ __m128i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
+ res = _mm_packus_epi16(res, next);
+ _mm_storeu_si128((__m128i *)dst, res);
+ if (width == 32) {
+ res = predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0);
+ next = predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0);
+ res = _mm_packus_epi16(res, next);
+ _mm_storeu_si128((__m128i *)(dst + 16), res);
+ }
+ }
+ dst += dst_stride;
+ } while ((row += CFL_BUF_LINE_I128) < row_end);
+}
+
+CFL_PREDICT_FN(ssse3, lbd)
+
+static INLINE __m128i highbd_max_epi16(int bd) {
+ const __m128i neg_one = _mm_set1_epi16(-1);
+ // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd)
+ return _mm_xor_si128(_mm_slli_epi16(neg_one, bd), neg_one);
+}
+
+static INLINE __m128i highbd_clamp_epi16(__m128i u, __m128i zero, __m128i max) {
+ return _mm_max_epi16(_mm_min_epi16(u, max), zero);
+}
+
+static INLINE void cfl_predict_hbd_ssse3(const int16_t *pred_buf_q3,
+ uint16_t *dst, int dst_stride,
+ int alpha_q3, int bd, int width,
+ int height) {
+ const __m128i alpha_sign = _mm_set1_epi16(alpha_q3);
+ const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
+ const __m128i dc_q0 = _mm_set1_epi16(*dst);
+ const __m128i max = highbd_max_epi16(bd);
+ const __m128i zeros = _mm_setzero_si128();
+ __m128i *row = (__m128i *)pred_buf_q3;
+ const __m128i *row_end = row + height * CFL_BUF_LINE_I128;
+ do {
+ __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
+ res = highbd_clamp_epi16(res, zeros, max);
+ if (width == 4) {
+ _mm_storel_epi64((__m128i *)dst, res);
+ } else {
+ _mm_storeu_si128((__m128i *)dst, res);
+ }
+ if (width >= 16) {
+ const __m128i res_1 =
+ predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
+ _mm_storeu_si128(((__m128i *)dst) + 1,
+ highbd_clamp_epi16(res_1, zeros, max));
+ }
+ if (width == 32) {
+ const __m128i res_2 =
+ predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0);
+ _mm_storeu_si128((__m128i *)(dst + 16),
+ highbd_clamp_epi16(res_2, zeros, max));
+ const __m128i res_3 =
+ predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0);
+ _mm_storeu_si128((__m128i *)(dst + 24),
+ highbd_clamp_epi16(res_3, zeros, max));
+ }
+ dst += dst_stride;
+ } while ((row += CFL_BUF_LINE_I128) < row_end);
+}
+
+CFL_PREDICT_FN(ssse3, hbd)
diff --git a/third_party/aom/av1/common/x86/convolve_2d_avx2.c b/third_party/aom/av1/common/x86/convolve_2d_avx2.c
new file mode 100644
index 000000000..fd5e90a2e
--- /dev/null
+++ b/third_party/aom/av1/common/x86/convolve_2d_avx2.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/convolve_common_intrin.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "av1/common/convolve.h"
+
+void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int bd = 8;
+
+ DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = 8;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+
+ __m256i filt[4], coeffs_h[4], coeffs_v[4];
+
+ assert(conv_params->round_0 > 0);
+
+ filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+ filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+ filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+ filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+ prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_h);
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_v);
+
+ const __m256i round_const_h = _mm256_set1_epi16(
+ ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2)));
+ const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1);
+
+ const __m256i sum_round_v = _mm256_set1_epi32(
+ (1 << offset_bits) + ((1 << conv_params->round_1) >> 1));
+ const __m128i sum_shift_v = _mm_cvtsi32_si128(conv_params->round_1);
+
+ const __m256i round_const_v = _mm256_set1_epi32(
+ ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) -
+ ((1 << (offset_bits - conv_params->round_1)) >> 1));
+ const __m128i round_shift_v = _mm_cvtsi32_si128(bits);
+
+ for (j = 0; j < w; j += 8) {
+ for (i = 0; i < im_h; i += 2) {
+ __m256i data = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j]));
+
+ // Load the next line
+ if (i + 1 < im_h)
+ data = _mm256_inserti128_si256(
+ data,
+ _mm_loadu_si128(
+ (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]),
+ 1);
+
+ __m256i res = convolve_lowbd_x(data, coeffs_h, filt);
+
+ res =
+ _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h);
+
+ _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
+ }
+
+ /* Vertical filter */
+ {
+ __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
+ __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
+ __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
+ __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
+ __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
+ __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
+
+ __m256i s[8];
+ s[0] = _mm256_unpacklo_epi16(src_0, src_1);
+ s[1] = _mm256_unpacklo_epi16(src_2, src_3);
+ s[2] = _mm256_unpacklo_epi16(src_4, src_5);
+
+ s[4] = _mm256_unpackhi_epi16(src_0, src_1);
+ s[5] = _mm256_unpackhi_epi16(src_2, src_3);
+ s[6] = _mm256_unpackhi_epi16(src_4, src_5);
+
+ for (i = 0; i < h; i += 2) {
+ const int16_t *data = &im_block[i * im_stride];
+
+ const __m256i s6 =
+ _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
+ const __m256i s7 =
+ _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
+
+ s[3] = _mm256_unpacklo_epi16(s6, s7);
+ s[7] = _mm256_unpackhi_epi16(s6, s7);
+
+ __m256i res_a = convolve(s, coeffs_v);
+ __m256i res_b = convolve(s + 4, coeffs_v);
+
+ // Combine V round and 2F-H-V round into a single rounding
+ res_a =
+ _mm256_sra_epi32(_mm256_add_epi32(res_a, sum_round_v), sum_shift_v);
+ res_b =
+ _mm256_sra_epi32(_mm256_add_epi32(res_b, sum_round_v), sum_shift_v);
+
+ const __m256i res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a, round_const_v), round_shift_v);
+ const __m256i res_b_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_b, round_const_v), round_shift_v);
+
+ /* rounding code */
+ // 16 bit conversion
+ const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round);
+ // 8 bit conversion and saturation to uint8
+ const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_8b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1);
+
+ // Store values into the destination buffer
+ __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j];
+ __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride];
+ if (w - j > 4) {
+ _mm_storel_epi64(p_0, res_0);
+ _mm_storel_epi64(p_1, res_1);
+ } else if (w == 4) {
+ xx_storel_32(p_0, res_0);
+ xx_storel_32(p_1, res_1);
+ } else {
+ *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
+ *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
+ }
+
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+ }
+}
+
+static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
+ __m256i s[4];
+ s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
+ s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
+ s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 32));
+ s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 32));
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]);
+ _mm256_storeu_si256((__m256i *)(dst + 2 * 32), s[2]);
+ _mm256_storeu_si256((__m256i *)(dst + 3 * 32), s[3]);
+}
+
+void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+
+ if (w >= 16) {
+ assert(!((intptr_t)dst % 16));
+ assert(!(dst_stride % 16));
+ }
+
+ if (w == 2) {
+ do {
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 4) {
+ do {
+ memcpy(dst, src, 4 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ memcpy(dst, src, 4 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 8) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ _mm_storel_epi64((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_storel_epi64((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 16) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ _mm_store_si128((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 32) {
+ do {
+ __m256i s[2];
+ s[0] = _mm256_loadu_si256((__m256i *)src);
+ src += src_stride;
+ s[1] = _mm256_loadu_si256((__m256i *)src);
+ src += src_stride;
+ _mm256_storeu_si256((__m256i *)dst, s[0]);
+ dst += dst_stride;
+ _mm256_storeu_si256((__m256i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 64) {
+ do {
+ __m256i s[4];
+ s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
+ s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
+ src += src_stride;
+ s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
+ s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
+ src += src_stride;
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]);
+ dst += dst_stride;
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[2]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[3]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else {
+ do {
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/convolve_2d_sse2.c b/third_party/aom/av1/common/x86/convolve_2d_sse2.c
index e4d352c0e..fc0e65453 100644
--- a/third_party/aom/av1/common/x86/convolve_2d_sse2.c
+++ b/third_party/aom/av1/common/x86/convolve_2d_sse2.c
@@ -11,197 +11,20 @@
#include <emmintrin.h>
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
#include "aom_dsp/aom_convolve.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_sse2.h"
#include "av1/common/convolve.h"
-#if CONFIG_COMPOUND_ROUND
-void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- DECLARE_ALIGNED(16, uint8_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int do_average = conv_params->do_average;
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const __m128i zero = _mm_setzero_si128();
-
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << conv_params->round_0) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
- // Filter even-index pixels
- const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- res = _mm_packus_epi16(res, res);
- _mm_storel_epi64((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << conv_params->round_1) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const uint8_t *data = &im_block[i * im_stride + j];
- const __m128i src_01 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 0 * im_stride)),
- _mm_loadl_epi64((__m128i *)(data + 1 * im_stride)));
- const __m128i src_23 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 2 * im_stride)),
- _mm_loadl_epi64((__m128i *)(data + 3 * im_stride)));
- const __m128i src_45 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 4 * im_stride)),
- _mm_loadl_epi64((__m128i *)(data + 5 * im_stride)));
- const __m128i src_67 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 6 * im_stride)),
- _mm_loadl_epi64((__m128i *)(data + 7 * im_stride)));
-
- const __m128i src_0 = _mm_unpacklo_epi8(src_01, zero);
- const __m128i src_2 = _mm_unpacklo_epi8(src_23, zero);
- const __m128i src_4 = _mm_unpacklo_epi8(src_45, zero);
- const __m128i src_6 = _mm_unpacklo_epi8(src_67, zero);
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpackhi_epi8(src_01, zero);
- const __m128i src_3 = _mm_unpackhi_epi8(src_23, zero);
- const __m128i src_5 = _mm_unpackhi_epi8(src_45, zero);
- const __m128i src_7 = _mm_unpackhi_epi8(src_67, zero);
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- // Accumulate values into the destination buffer
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- if (do_average) {
- _mm_storeu_si128(p + 0,
- _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
- } else {
- _mm_storeu_si128(p + 0, res_lo_round);
- _mm_storeu_si128(p + 1, res_hi_round);
- }
- }
- }
- }
-}
-#else
-void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
- InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
+void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
const int bd = 8;
DECLARE_ALIGNED(16, int16_t,
@@ -211,10 +34,14 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
int i, j;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int do_average = conv_params->do_average;
const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
const __m128i zero = _mm_setzero_si128();
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+
+ assert(conv_params->round_0 > 0);
/* Horizontal filter */
{
@@ -237,7 +64,7 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ (1 << (bd + FILTER_BITS - 1)) + ((1 << conv_params->round_0) >> 1));
const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
for (i = 0; i < im_h; ++i) {
@@ -302,10 +129,14 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
// coeffs 6 7 6 7 6 7 6 7
const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+ const __m128i sum_round =
+ _mm_set1_epi32((1 << offset_bits) + ((1 << conv_params->round_1) >> 1));
+ const __m128i sum_shift = _mm_cvtsi32_si128(conv_params->round_1);
+
const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
+ ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) -
+ ((1 << (offset_bits - conv_params->round_1)) >> 1));
+ const __m128i round_shift = _mm_cvtsi32_si128(bits);
for (i = 0; i < h; ++i) {
for (j = 0; j < w; j += 8) {
@@ -358,24 +189,285 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+ __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, sum_round), sum_shift);
+ __m128i res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, sum_round), sum_shift);
+
+ res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const),
+ round_shift);
+ res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const),
+ round_shift);
+
+ const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
+ const __m128i res = _mm_packus_epi16(res16, res16);
// Accumulate values into the destination buffer
__m128i *const p = (__m128i *)&dst[i * dst_stride + j];
+
+ if (w == 2) {
+ *(uint16_t *)p = _mm_cvtsi128_si32(res);
+ } else if (w == 4) {
+ *(uint32_t *)p = _mm_cvtsi128_si32(res);
+ } else {
+ _mm_storel_epi64(p, res);
+ }
+ }
+ }
+ }
+}
+
+static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
+ __m128i s[8];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
+ s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
+ s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 16));
+ s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 16));
+ s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 16));
+ s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 16));
+ _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
+ _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
+ _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
+ _mm_store_si128((__m128i *)(dst + 4 * 16), s[4]);
+ _mm_store_si128((__m128i *)(dst + 5 * 16), s[5]);
+ _mm_store_si128((__m128i *)(dst + 6 * 16), s[6]);
+ _mm_store_si128((__m128i *)(dst + 7 * 16), s[7]);
+}
+
+void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+
+ if (w >= 16) {
+ assert(!((intptr_t)dst % 16));
+ assert(!(dst_stride % 16));
+ }
+
+ if (w == 2) {
+ do {
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 4) {
+ do {
+ memcpy(dst, src, 4 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ memcpy(dst, src, 4 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 8) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ _mm_storel_epi64((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_storel_epi64((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 16) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ _mm_store_si128((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 32) {
+ do {
+ __m128i s[4];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
+ src += src_stride;
+ s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
+ src += src_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), s[2]);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), s[3]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 64) {
+ do {
+ __m128i s[8];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
+ s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
+ src += src_stride;
+ s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
+ s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
+ s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
+ s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
+ src += src_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
+ _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
+ _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), s[4]);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), s[5]);
+ _mm_store_si128((__m128i *)(dst + 2 * 16), s[6]);
+ _mm_store_si128((__m128i *)(dst + 3 * 16), s[7]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else {
+ do {
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ }
+}
+
+void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
+ uint8_t *dst0, int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int bd = 8;
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ int i, j;
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
+
+ assert((w % 4) == 0);
+
+ if (!(w % 16)) {
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 16) {
+ const __m128i d8 = _mm_loadu_si128((__m128i *)&src[j]);
+
+ const __m128i d16_lo = _mm_unpacklo_epi8(d8, zero);
+ const __m128i d16_hi = _mm_unpackhi_epi8(d8, zero);
+
+ const __m128i res_lo = _mm_sll_epi16(d16_lo, left_shift);
+ const __m128i res_unsigned_lo = _mm_add_epi16(res_lo, offset_const);
+
+ const __m128i res_hi = _mm_sll_epi16(d16_hi, left_shift);
+ const __m128i res_unsigned_hi = _mm_add_epi16(res_hi, offset_const);
+
if (do_average) {
- _mm_storeu_si128(p + 0,
- _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+ const __m128i data_ref_0_lo = _mm_loadu_si128((__m128i *)(&dst[j]));
+ const __m128i data_ref_0_hi =
+ _mm_loadu_si128((__m128i *)(&dst[j + 8]));
+
+ const __m128i comp_avg_res_lo =
+ comp_avg(&data_ref_0_lo, &res_unsigned_lo, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result_lo = convolve_rounding(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i comp_avg_res_hi =
+ comp_avg(&data_ref_0_hi, &res_unsigned_hi, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result_hi = convolve_rounding(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 =
+ _mm_packus_epi16(round_result_lo, round_result_hi);
+
+ _mm_store_si128((__m128i *)(&dst0[j]), res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[j]), res_unsigned_lo);
+ _mm_store_si128((__m128i *)(&dst[j + 8]), res_unsigned_hi);
+ }
+ }
+ src += src_stride;
+ dst += dst_stride;
+ dst0 += dst_stride0;
+ }
+ } else {
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]);
+ const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
+
+ const __m128i res = _mm_sll_epi16(d16_0, left_shift);
+ const __m128i res_unsigned = _mm_add_epi16(res, offset_const);
+
+ if (do_average) {
+ const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)(&dst[j]));
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+
+ if (w > 4)
+ _mm_storel_epi64((__m128i *)(&dst0[j]), res_8);
+ else
+ *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
} else {
- _mm_storeu_si128(p + 0, res_lo_round);
- _mm_storeu_si128(p + 1, res_hi_round);
+ _mm_store_si128((__m128i *)(&dst[j]), res_unsigned);
}
}
+ src += src_stride;
+ dst += dst_stride;
+ dst0 += dst_stride0;
}
}
}
-#endif
diff --git a/third_party/aom/av1/common/x86/convolve_avx2.c b/third_party/aom/av1/common/x86/convolve_avx2.c
index a0e58716d..6fdfb0954 100644
--- a/third_party/aom/av1/common/x86/convolve_avx2.c
+++ b/third_party/aom/av1/common/x86/convolve_avx2.c
@@ -11,332 +11,267 @@
#include <immintrin.h>
-#include "aom_dsp/aom_dsp_common.h"
-#include "./av1_rtcd.h"
-
-#if CONFIG_CONVOLVE_ROUND
-static const uint32_t sindex[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
-
-// 16 epi16 pixels
-static INLINE void pixel_clamp_avx2(__m256i *u, int bd) {
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i max = _mm256_sub_epi16(_mm256_slli_epi16(one, bd), one);
- __m256i clamped, mask;
-
- mask = _mm256_cmpgt_epi16(*u, max);
- clamped = _mm256_andnot_si256(mask, *u);
- mask = _mm256_and_si256(mask, max);
- clamped = _mm256_or_si256(mask, clamped);
-
- const __m256i zero = _mm256_setzero_si256();
- mask = _mm256_cmpgt_epi16(clamped, zero);
- *u = _mm256_and_si256(clamped, mask);
-}
-
-// 8 epi16 pixels
-static INLINE void pixel_clamp_sse2(__m128i *u, int bd) {
- const __m128i one = _mm_set1_epi16(1);
- const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
- __m128i clamped, mask;
-
- mask = _mm_cmpgt_epi16(*u, max);
- clamped = _mm_andnot_si128(mask, *u);
- mask = _mm_and_si128(mask, max);
- clamped = _mm_or_si128(mask, clamped);
-
- const __m128i zero = _mm_setzero_si128();
- mask = _mm_cmpgt_epi16(clamped, zero);
- *u = _mm_and_si128(clamped, mask);
-}
-
-// Work on multiple of 32 pixels
-static INLINE void cal_rounding_32xn_avx2(const int32_t *src, uint8_t *dst,
- const __m256i *rnd, int shift,
- int num) {
- do {
- __m256i x0 = _mm256_loadu_si256((const __m256i *)src);
- __m256i x1 = _mm256_loadu_si256((const __m256i *)src + 1);
- __m256i x2 = _mm256_loadu_si256((const __m256i *)src + 2);
- __m256i x3 = _mm256_loadu_si256((const __m256i *)src + 3);
-
- x0 = _mm256_add_epi32(x0, *rnd);
- x1 = _mm256_add_epi32(x1, *rnd);
- x2 = _mm256_add_epi32(x2, *rnd);
- x3 = _mm256_add_epi32(x3, *rnd);
-
- x0 = _mm256_srai_epi32(x0, shift);
- x1 = _mm256_srai_epi32(x1, shift);
- x2 = _mm256_srai_epi32(x2, shift);
- x3 = _mm256_srai_epi32(x3, shift);
-
- x0 = _mm256_packs_epi32(x0, x1);
- x2 = _mm256_packs_epi32(x2, x3);
-
- pixel_clamp_avx2(&x0, 8);
- pixel_clamp_avx2(&x2, 8);
-
- x0 = _mm256_packus_epi16(x0, x2);
- x1 = _mm256_loadu_si256((const __m256i *)sindex);
- x2 = _mm256_permutevar8x32_epi32(x0, x1);
-
- _mm256_storeu_si256((__m256i *)dst, x2);
- src += 32;
- dst += 32;
- num--;
- } while (num > 0);
-}
-
-static INLINE void cal_rounding_16_avx2(const int32_t *src, uint8_t *dst,
- const __m256i *rnd, int shift) {
- __m256i x0 = _mm256_loadu_si256((const __m256i *)src);
- __m256i x1 = _mm256_loadu_si256((const __m256i *)src + 1);
-
- x0 = _mm256_add_epi32(x0, *rnd);
- x1 = _mm256_add_epi32(x1, *rnd);
-
- x0 = _mm256_srai_epi32(x0, shift);
- x1 = _mm256_srai_epi32(x1, shift);
-
- x0 = _mm256_packs_epi32(x0, x1);
- pixel_clamp_avx2(&x0, 8);
-
- const __m256i x2 = _mm256_packus_epi16(x0, x0);
- x1 = _mm256_loadu_si256((const __m256i *)sindex);
- x0 = _mm256_permutevar8x32_epi32(x2, x1);
-
- _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(x0));
-}
-
-static INLINE void cal_rounding_8_avx2(const int32_t *src, uint8_t *dst,
- const __m256i *rnd, int shift) {
- __m256i x0 = _mm256_loadu_si256((const __m256i *)src);
- x0 = _mm256_add_epi32(x0, *rnd);
- x0 = _mm256_srai_epi32(x0, shift);
-
- x0 = _mm256_packs_epi32(x0, x0);
- pixel_clamp_avx2(&x0, 8);
-
- x0 = _mm256_packus_epi16(x0, x0);
- const __m256i x1 = _mm256_loadu_si256((const __m256i *)sindex);
- x0 = _mm256_permutevar8x32_epi32(x0, x1);
+#include "config/av1_rtcd.h"
- _mm_storel_epi64((__m128i *)dst, _mm256_castsi256_si128(x0));
-}
-
-static INLINE void cal_rounding_4_sse2(const int32_t *src, uint8_t *dst,
- const __m128i *rnd, int shift) {
- __m128i x = _mm_loadu_si128((const __m128i *)src);
- x = _mm_add_epi32(x, *rnd);
- x = _mm_srai_epi32(x, shift);
-
- x = _mm_packs_epi32(x, x);
- pixel_clamp_sse2(&x, 8);
-
- x = _mm_packus_epi16(x, x);
- *(uint32_t *)dst = _mm_cvtsi128_si32(x);
-}
-
-void av1_convolve_rounding_avx2(const int32_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- int bits) {
- const __m256i rnd_num = _mm256_set1_epi32((int32_t)(1 << (bits - 1)));
- const __m128i rnd_num_sse2 = _mm256_castsi256_si128(rnd_num);
-
- if (w > 64) { // width = 128
- do {
- cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 4);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 32) { // width = 64
- do {
- cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 2);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 16) { // width = 32
- do {
- cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 1);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 8) { // width = 16
- do {
- cal_rounding_16_avx2(src, dst, &rnd_num, bits);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 4) { // width = 8
- do {
- cal_rounding_8_avx2(src, dst, &rnd_num, bits);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 2) { // width = 4
- do {
- cal_rounding_4_sse2(src, dst, &rnd_num_sse2, bits);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else { // width = 2
- do {
- dst[0] = clip_pixel(ROUND_POWER_OF_TWO(src[0], bits));
- dst[1] = clip_pixel(ROUND_POWER_OF_TWO(src[1], bits));
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/synonyms.h"
+
+void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_vert * src_stride;
+
+ // right shift is F-1 because we are already dividing
+ // filter co-efficients by 2
+ const int right_shift_bits = (FILTER_BITS - 1);
+ const __m128i right_shift = _mm_cvtsi32_si128(right_shift_bits);
+ const __m256i right_shift_const =
+ _mm256_set1_epi16((1 << right_shift_bits) >> 1);
+ __m256i coeffs[4], s[8];
+
+ assert(conv_params->round_0 <= FILTER_BITS);
+ assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
+ ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
+
+ prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
+
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ (void)conv_params;
+
+ for (j = 0; j < w; j += 16) {
+ const uint8_t *data = &src_ptr[j];
+ __m256i src6;
+
+ // Load lines a and b. Line a to lower 128, line b to upper 128
+ const __m256i src_01a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ 0x20);
+
+ const __m256i src_12a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ 0x20);
+
+ const __m256i src_23a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ 0x20);
+
+ const __m256i src_34a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ 0x20);
+
+ const __m256i src_45a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ 0x20);
+
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
+ const __m256i src_56a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ src6, 0x20);
+
+ s[0] = _mm256_unpacklo_epi8(src_01a, src_12a);
+ s[1] = _mm256_unpacklo_epi8(src_23a, src_34a);
+ s[2] = _mm256_unpacklo_epi8(src_45a, src_56a);
+
+ s[4] = _mm256_unpackhi_epi8(src_01a, src_12a);
+ s[5] = _mm256_unpackhi_epi8(src_23a, src_34a);
+ s[6] = _mm256_unpackhi_epi8(src_45a, src_56a);
+
+ for (i = 0; i < h; i += 2) {
+ data = &src_ptr[i * src_stride + j];
+ const __m256i src_67a = _mm256_permute2x128_si256(
+ src6,
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ 0x20);
+
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
+ const __m256i src_78a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ src6, 0x20);
+
+ s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
+ s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
+
+ const __m256i res_lo = convolve_lowbd(s, coeffs);
+
+ /* rounding code */
+ // shift by F - 1
+ const __m256i res_16b_lo = _mm256_sra_epi16(
+ _mm256_add_epi16(res_lo, right_shift_const), right_shift);
+ // 8 bit conversion and saturation to uint8
+ __m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo);
+
+ if (w - j > 8) {
+ const __m256i res_hi = convolve_lowbd(s + 4, coeffs);
+
+ /* rounding code */
+ // shift by F - 1
+ const __m256i res_16b_hi = _mm256_sra_epi16(
+ _mm256_add_epi16(res_hi, right_shift_const), right_shift);
+ // 8 bit conversion and saturation to uint8
+ __m256i res_8b_hi = _mm256_packus_epi16(res_16b_hi, res_16b_hi);
+
+ __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_a);
+ const __m128i res_1 = _mm256_extracti128_si256(res_a, 1);
+
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_0);
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ res_1);
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_8b_lo);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
+ if (w - j > 4) {
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_0);
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ res_1);
+ } else if (w - j > 2) {
+ xx_storel_32(&dst[i * dst_stride + j], res_0);
+ xx_storel_32(&dst[i * dst_stride + j + dst_stride], res_1);
+ } else {
+ __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j];
+ __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride];
+ *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
+ *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
+ }
+ }
+
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
}
}
-#if CONFIG_HIGHBITDEPTH
-static INLINE void cal_highbd_rounding_32xn_avx2(const int32_t *src,
- uint16_t *dst,
- const __m256i *rnd, int shift,
- int num, int bd) {
- do {
- __m256i x0 = _mm256_loadu_si256((const __m256i *)src);
- __m256i x1 = _mm256_loadu_si256((const __m256i *)src + 1);
- __m256i x2 = _mm256_loadu_si256((const __m256i *)src + 2);
- __m256i x3 = _mm256_loadu_si256((const __m256i *)src + 3);
-
- x0 = _mm256_add_epi32(x0, *rnd);
- x1 = _mm256_add_epi32(x1, *rnd);
- x2 = _mm256_add_epi32(x2, *rnd);
- x3 = _mm256_add_epi32(x3, *rnd);
-
- x0 = _mm256_srai_epi32(x0, shift);
- x1 = _mm256_srai_epi32(x1, shift);
- x2 = _mm256_srai_epi32(x2, shift);
- x3 = _mm256_srai_epi32(x3, shift);
-
- x0 = _mm256_packs_epi32(x0, x1);
- x2 = _mm256_packs_epi32(x2, x3);
-
- pixel_clamp_avx2(&x0, bd);
- pixel_clamp_avx2(&x2, bd);
-
- x0 = _mm256_permute4x64_epi64(x0, 0xD8);
- x2 = _mm256_permute4x64_epi64(x2, 0xD8);
-
- _mm256_storeu_si256((__m256i *)dst, x0);
- _mm256_storeu_si256((__m256i *)(dst + 16), x2);
- src += 32;
- dst += 32;
- num--;
- } while (num > 0);
-}
-
-static INLINE void cal_highbd_rounding_16_avx2(const int32_t *src,
- uint16_t *dst,
- const __m256i *rnd, int shift,
- int bd) {
- __m256i x0 = _mm256_loadu_si256((const __m256i *)src);
- __m256i x1 = _mm256_loadu_si256((const __m256i *)src + 1);
-
- x0 = _mm256_add_epi32(x0, *rnd);
- x1 = _mm256_add_epi32(x1, *rnd);
-
- x0 = _mm256_srai_epi32(x0, shift);
- x1 = _mm256_srai_epi32(x1, shift);
-
- x0 = _mm256_packs_epi32(x0, x1);
- pixel_clamp_avx2(&x0, bd);
-
- x0 = _mm256_permute4x64_epi64(x0, 0xD8);
- _mm256_storeu_si256((__m256i *)dst, x0);
-}
-
-static INLINE void cal_highbd_rounding_8_avx2(const int32_t *src, uint16_t *dst,
- const __m256i *rnd, int shift,
- int bd) {
- __m256i x = _mm256_loadu_si256((const __m256i *)src);
- x = _mm256_add_epi32(x, *rnd);
- x = _mm256_srai_epi32(x, shift);
-
- x = _mm256_packs_epi32(x, x);
- pixel_clamp_avx2(&x, bd);
-
- x = _mm256_permute4x64_epi64(x, 0xD8);
- _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(x));
-}
-
-static INLINE void cal_highbd_rounding_4_sse2(const int32_t *src, uint16_t *dst,
- const __m128i *rnd, int shift,
- int bd) {
- __m128i x = _mm_loadu_si128((const __m128i *)src);
- x = _mm_add_epi32(x, *rnd);
- x = _mm_srai_epi32(x, shift);
-
- x = _mm_packs_epi32(x, x);
- pixel_clamp_sse2(&x, bd);
- _mm_storel_epi64((__m128i *)dst, x);
-}
-
-void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride,
- uint8_t *dst8, int dst_stride, int w,
- int h, int bits, int bd) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- const __m256i rnd_num = _mm256_set1_epi32((int32_t)(1 << (bits - 1)));
- const __m128i rnd_num_sse2 = _mm256_castsi256_si128(rnd_num);
-
- if (w > 64) { // width = 128
- do {
- cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 4, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 32) { // width = 64
- do {
- cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 2, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 16) { // width = 32
- do {
- cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 1, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 8) { // width = 16
- do {
- cal_highbd_rounding_16_avx2(src, dst, &rnd_num, bits, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 4) { // width = 8
- do {
- cal_highbd_rounding_8_avx2(src, dst, &rnd_num, bits, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (w > 2) { // width = 4
- do {
- cal_highbd_rounding_4_sse2(src, dst, &rnd_num_sse2, bits, bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else { // width = 2
- do {
- dst[0] = clip_pixel_highbd(ROUND_POWER_OF_TWO(src[0], bits), bd);
- dst[1] = clip_pixel_highbd(ROUND_POWER_OF_TWO(src[1], bits), bd);
- src += src_stride;
- dst += dst_stride;
- h--;
- } while (h > 0);
+void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ int i, j;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_0;
+
+ __m256i filt[4], coeffs[4];
+
+ filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+ filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+ filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+ filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+ prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
+
+ const __m256i round_0_const =
+ _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
+ const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
+ const __m256i round_const = _mm256_set1_epi16((1 << bits) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(bits);
+
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ assert(bits >= 0);
+ assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+ ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+ assert(conv_params->round_0 > 0);
+
+ if (w <= 8) {
+ for (i = 0; i < h; i += 2) {
+ const __m256i data = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&src_ptr[i * src_stride + src_stride]))),
+ 0x20);
+
+ __m256i res_16b = convolve_lowbd_x(data, coeffs, filt);
+
+ res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const),
+ round_0_shift);
+
+ res_16b =
+ _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), round_shift);
+
+ /* rounding code */
+ // 8 bit conversion and saturation to uint8
+ __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_8b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1);
+ if (w > 4) {
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride], res_0);
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + dst_stride], res_1);
+ } else if (w > 2) {
+ xx_storel_32(&dst[i * dst_stride], res_0);
+ xx_storel_32(&dst[i * dst_stride + dst_stride], res_1);
+ } else {
+ __m128i *const p_0 = (__m128i *)&dst[i * dst_stride];
+ __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride];
+ *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
+ *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
+ }
+ }
+ } else {
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 16) {
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15 16 17 18
+ // 19 20 21 22 23
+ const __m256i data = _mm256_inserti128_si256(
+ _mm256_loadu_si256((__m256i *)&src_ptr[(i * src_stride) + j]),
+ _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + (j + 8)]),
+ 1);
+
+ __m256i res_16b = convolve_lowbd_x(data, coeffs, filt);
+
+ res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const),
+ round_0_shift);
+
+ res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const),
+ round_shift);
+
+ /* rounding code */
+ // 8 bit conversion and saturation to uint8
+ __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
+
+ // Store values into the destination buffer
+ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ res_8b = _mm256_permute4x64_epi64(res_8b, 216);
+ __m128i res = _mm256_castsi256_si128(res_8b);
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res);
+ }
+ }
}
}
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_CONVOLVE_ROUND
diff --git a/third_party/aom/av1/common/x86/convolve_sse2.c b/third_party/aom/av1/common/x86/convolve_sse2.c
new file mode 100644
index 000000000..18fe9ae5a
--- /dev/null
+++ b/third_party/aom/av1/common/x86/convolve_sse2.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_common_intrin.h"
+#include "av1/common/convolve.h"
+
+static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
+ const int subpel_q4,
+ __m128i *const coeffs /* [4] */) {
+ const int16_t *const y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params, subpel_q4 & SUBPEL_MASK);
+ const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ coeffs[0] = _mm_unpacklo_epi64(tmp_0, tmp_0); // coeffs 0 1 0 1 0 1 0 1
+ coeffs[1] = _mm_unpackhi_epi64(tmp_0, tmp_0); // coeffs 2 3 2 3 2 3 2 3
+ coeffs[2] = _mm_unpacklo_epi64(tmp_1, tmp_1); // coeffs 4 5 4 5 4 5 4 5
+ coeffs[3] = _mm_unpackhi_epi64(tmp_1, tmp_1); // coeffs 6 7 6 7 6 7 6 7
+}
+
+static INLINE __m128i convolve(const __m128i *const s,
+ const __m128i *const coeffs) {
+ const __m128i d0 = _mm_madd_epi16(s[0], coeffs[0]);
+ const __m128i d1 = _mm_madd_epi16(s[1], coeffs[1]);
+ const __m128i d2 = _mm_madd_epi16(s[2], coeffs[2]);
+ const __m128i d3 = _mm_madd_epi16(s[3], coeffs[3]);
+ const __m128i d = _mm_add_epi32(_mm_add_epi32(d0, d1), _mm_add_epi32(d2, d3));
+ return d;
+}
+
+static INLINE __m128i convolve_lo_x(const __m128i *const s,
+ const __m128i *const coeffs) {
+ __m128i ss[4];
+ ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
+ ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128());
+ ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
+ ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128());
+ return convolve(ss, coeffs);
+}
+
+static INLINE __m128i convolve_lo_y(const __m128i *const s,
+ const __m128i *const coeffs) {
+ __m128i ss[4];
+ ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
+ ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
+ ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128());
+ ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128());
+ return convolve(ss, coeffs);
+}
+
+static INLINE __m128i convolve_hi_y(const __m128i *const s,
+ const __m128i *const coeffs) {
+ __m128i ss[4];
+ ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128());
+ ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128());
+ ss[2] = _mm_unpackhi_epi8(s[4], _mm_setzero_si128());
+ ss[3] = _mm_unpackhi_epi8(s[6], _mm_setzero_si128());
+ return convolve(ss, coeffs);
+}
+
+void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint8_t *src_ptr = src - fo_vert * src_stride;
+ const __m128i round_const = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(FILTER_BITS);
+ __m128i coeffs[4];
+
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+ (void)conv_params;
+
+ assert(conv_params->round_0 <= FILTER_BITS);
+ assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
+ ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
+
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
+
+ if (w <= 4) {
+ __m128i s[8], src6, res, res_round, res16;
+ uint32_t res_int;
+ src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
+ s[0] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
+ s[1] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
+ s[2] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
+ s[3] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
+ s[4] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
+ s[5] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
+
+ do {
+ s[6] = _mm_unpacklo_epi8(
+ src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
+ src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
+ s[7] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
+
+ res = convolve_lo_y(s + 0, coeffs);
+ res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);
+ res16 = _mm_packs_epi32(res_round, res_round);
+ res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16));
+
+ if (w == 2)
+ *(uint16_t *)dst = res_int;
+ else
+ *(uint32_t *)dst = res_int;
+
+ src_ptr += src_stride;
+ dst += dst_stride;
+
+ res = convolve_lo_y(s + 1, coeffs);
+ res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);
+ res16 = _mm_packs_epi32(res_round, res_round);
+ res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16));
+
+ if (w == 2)
+ *(uint16_t *)dst = res_int;
+ else
+ *(uint32_t *)dst = res_int;
+
+ src_ptr += src_stride;
+ dst += dst_stride;
+
+ s[0] = s[2];
+ s[1] = s[3];
+ s[2] = s[4];
+ s[3] = s[5];
+ s[4] = s[6];
+ s[5] = s[7];
+ h -= 2;
+ } while (h);
+ } else {
+ assert(!(w % 8));
+ int j = 0;
+ do {
+ __m128i s[8], src6, res_lo, res_hi;
+ __m128i res_lo_round, res_hi_round, res16, res;
+ const uint8_t *data = &src_ptr[j];
+
+ src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
+ s[0] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
+ s[1] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
+ s[2] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
+ s[3] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
+ s[4] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
+ s[5] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
+
+ int i = 0;
+ do {
+ data = &src_ptr[i * src_stride + j];
+ s[6] = _mm_unpacklo_epi8(
+ src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
+ src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
+ s[7] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
+
+ res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels
+ res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels
+
+ res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+ res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+
+ res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
+ res = _mm_packus_epi16(res16, res16);
+
+ _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
+ i++;
+
+ res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels
+ res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels
+
+ res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+ res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+
+ res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
+ res = _mm_packus_epi16(res16, res16);
+
+ _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
+ i++;
+
+ s[0] = s[2];
+ s[1] = s[3];
+ s[2] = s[4];
+ s[3] = s[5];
+ s[4] = s[6];
+ s[5] = s[7];
+ } while (i < h);
+ j += 8;
+ } while (j < w);
+ }
+}
+
+void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ const __m128i round_0_const =
+ _mm_set1_epi32((1 << conv_params->round_0) >> 1);
+ const __m128i round_const = _mm_set1_epi32((1 << bits) >> 1);
+ const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0);
+ const __m128i round_shift = _mm_cvtsi32_si128(bits);
+ __m128i coeffs[4];
+
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ assert(bits >= 0);
+ assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+ ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
+
+ if (w <= 4) {
+ do {
+ const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
+ __m128i s[4];
+
+ s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
+ s[1] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
+ s[2] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
+ s[3] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
+ const __m128i res_lo = convolve_lo_x(s, coeffs);
+ __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift);
+ res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const), round_shift);
+
+ const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round);
+ const __m128i res = _mm_packus_epi16(res16, res16);
+
+ uint32_t r = _mm_cvtsi128_si32(res);
+ if (w == 2)
+ *(uint16_t *)dst = r;
+ else
+ *(uint32_t *)dst = r;
+
+ src_ptr += src_stride;
+ dst += dst_stride;
+ } while (--h);
+ } else {
+ assert(!(w % 8));
+ int i = 0;
+ do {
+ int j = 0;
+ do {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+ __m128i s[4];
+
+ // Filter even-index pixels
+ s[0] = data;
+ s[1] = _mm_srli_si128(data, 2);
+ s[2] = _mm_srli_si128(data, 4);
+ s[3] = _mm_srli_si128(data, 6);
+ const __m128i res_even = convolve_lo_x(s, coeffs);
+
+ // Filter odd-index pixels
+ s[0] = _mm_srli_si128(data, 1);
+ s[1] = _mm_srli_si128(data, 3);
+ s[2] = _mm_srli_si128(data, 5);
+ s[3] = _mm_srli_si128(data, 7);
+ const __m128i res_odd = convolve_lo_x(s, coeffs);
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+ __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift);
+ res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const),
+ round_shift);
+ __m128i res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_0_const), round_0_shift);
+ res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const),
+ round_shift);
+
+ const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
+ const __m128i res = _mm_packus_epi16(res16, res16);
+
+ _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
+ j += 8;
+ } while (j < w);
+ } while (++i < h);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/filterintra_sse4.c b/third_party/aom/av1/common/x86/filterintra_sse4.c
index 4f77da446..c11edc1d4 100644
--- a/third_party/aom/av1/common/x86/filterintra_sse4.c
+++ b/third_party/aom/av1/common/x86/filterintra_sse4.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -11,888 +11,65 @@
#include <smmintrin.h>
-#include "./av1_rtcd.h"
-#include "aom_ports/mem.h"
+#include "config/av1_rtcd.h"
+
+#include "aom_dsp/x86/synonyms.h"
#include "av1/common/enums.h"
#include "av1/common/reconintra.h"
-#if USE_3TAP_INTRA_FILTER
-void filterintra_sse4_3tap_dummy_func(void);
-void filterintra_sse4_3tap_dummy_func(void) {}
-#else
-
-static INLINE void AddPixelsSmall(const uint8_t *above, const uint8_t *left,
- __m128i *sum) {
- const __m128i a = _mm_loadu_si128((const __m128i *)above);
- const __m128i l = _mm_loadu_si128((const __m128i *)left);
- const __m128i zero = _mm_setzero_si128();
-
- __m128i u0 = _mm_unpacklo_epi8(a, zero);
- __m128i u1 = _mm_unpacklo_epi8(l, zero);
-
- sum[0] = _mm_add_epi16(u0, u1);
-}
-
-static INLINE int GetMeanValue4x4(const uint8_t *above, const uint8_t *left,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint16_t sum_value;
-
- AddPixelsSmall(above, left, &sum_vector);
-
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
-
- sum_value = _mm_extract_epi16(sum_vector, 0);
- sum_value += 4;
- sum_value >>= 3;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-static INLINE int GetMeanValue8x8(const uint8_t *above, const uint8_t *left,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint16_t sum_value;
-
- AddPixelsSmall(above, left, &sum_vector);
-
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
-
- sum_value = _mm_extract_epi16(sum_vector, 0);
- sum_value += 8;
- sum_value >>= 4;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-static INLINE void AddPixelsLarge(const uint8_t *above, const uint8_t *left,
- __m128i *sum) {
- const __m128i a = _mm_loadu_si128((const __m128i *)above);
- const __m128i l = _mm_loadu_si128((const __m128i *)left);
- const __m128i zero = _mm_setzero_si128();
-
- __m128i u0 = _mm_unpacklo_epi8(a, zero);
- __m128i u1 = _mm_unpacklo_epi8(l, zero);
-
- sum[0] = _mm_add_epi16(u0, u1);
-
- u0 = _mm_unpackhi_epi8(a, zero);
- u1 = _mm_unpackhi_epi8(l, zero);
-
- sum[0] = _mm_add_epi16(sum[0], u0);
- sum[0] = _mm_add_epi16(sum[0], u1);
-}
-
-static INLINE int GetMeanValue16x16(const uint8_t *above, const uint8_t *left,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint16_t sum_value;
-
- AddPixelsLarge(above, left, &sum_vector);
-
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
-
- sum_value = _mm_extract_epi16(sum_vector, 0);
- sum_value += 16;
- sum_value >>= 5;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-static INLINE int GetMeanValue32x32(const uint8_t *above, const uint8_t *left,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector[2], u;
- uint16_t sum_value;
-
- AddPixelsLarge(above, left, &sum_vector[0]);
- AddPixelsLarge(above + 16, left + 16, &sum_vector[1]);
-
- sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
- sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 4 values
- sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector[0], 2);
- sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
-
- sum_value = _mm_extract_epi16(sum_vector[0], 0);
- sum_value += 32;
- sum_value >>= 6;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-// Note:
-// params[4] : mean value, 4 int32_t repetition
-//
-static INLINE int CalcRefPixelsMeanValue(const uint8_t *above,
- const uint8_t *left, int bs,
- __m128i *params) {
- int meanValue = 0;
- switch (bs) {
- case 4: meanValue = GetMeanValue4x4(above, left, params); break;
- case 8: meanValue = GetMeanValue8x8(above, left, params); break;
- case 16: meanValue = GetMeanValue16x16(above, left, params); break;
- case 32: meanValue = GetMeanValue32x32(above, left, params); break;
- default: assert(0);
- }
- return meanValue;
-}
-
-// Note:
-// params[0-3] : 4-tap filter coefficients (int32_t per coefficient)
-//
-static INLINE void GetIntraFilterParams(int bs, int mode, __m128i *params) {
- const TX_SIZE tx_size =
- (bs == 32) ? TX_32X32
- : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
- // c0
- params[0] = _mm_set_epi32(av1_filter_intra_taps_4[tx_size][mode][0],
- av1_filter_intra_taps_4[tx_size][mode][0],
- av1_filter_intra_taps_4[tx_size][mode][0],
- av1_filter_intra_taps_4[tx_size][mode][0]);
- // c1
- params[1] = _mm_set_epi32(av1_filter_intra_taps_4[tx_size][mode][1],
- av1_filter_intra_taps_4[tx_size][mode][1],
- av1_filter_intra_taps_4[tx_size][mode][1],
- av1_filter_intra_taps_4[tx_size][mode][1]);
- // c2
- params[2] = _mm_set_epi32(av1_filter_intra_taps_4[tx_size][mode][2],
- av1_filter_intra_taps_4[tx_size][mode][2],
- av1_filter_intra_taps_4[tx_size][mode][2],
- av1_filter_intra_taps_4[tx_size][mode][2]);
- // c3
- params[3] = _mm_set_epi32(av1_filter_intra_taps_4[tx_size][mode][3],
- av1_filter_intra_taps_4[tx_size][mode][3],
- av1_filter_intra_taps_4[tx_size][mode][3],
- av1_filter_intra_taps_4[tx_size][mode][3]);
-}
-
-static const int maxBlkSize = 32;
-
-static INLINE void SavePred4x4(int *pred, const __m128i *mean, uint8_t *dst,
- ptrdiff_t stride) {
- const int predStride = (maxBlkSize << 1) + 1;
- __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
- __m128i p1 = _mm_loadu_si128((const __m128i *)(pred + predStride));
- __m128i p2 = _mm_loadu_si128((const __m128i *)(pred + 2 * predStride));
- __m128i p3 = _mm_loadu_si128((const __m128i *)(pred + 3 * predStride));
-
- p0 = _mm_add_epi32(p0, mean[0]);
- p1 = _mm_add_epi32(p1, mean[0]);
- p2 = _mm_add_epi32(p2, mean[0]);
- p3 = _mm_add_epi32(p3, mean[0]);
-
- p0 = _mm_packus_epi32(p0, p1);
- p1 = _mm_packus_epi32(p2, p3);
- p0 = _mm_packus_epi16(p0, p1);
-
- *((int *)dst) = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *((int *)(dst + stride)) = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *((int *)(dst + 2 * stride)) = _mm_cvtsi128_si32(p0);
- p0 = _mm_srli_si128(p0, 4);
- *((int *)(dst + 3 * stride)) = _mm_cvtsi128_si32(p0);
-}
-
-static void SavePred8x8(int *pred, const __m128i *mean, uint8_t *dst,
- ptrdiff_t stride) {
- const int predStride = (maxBlkSize << 1) + 1;
- __m128i p0, p1, p2, p3;
- int r = 0;
-
- while (r < 8) {
- p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
- p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
- r += 1;
- p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
- p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
-
- p0 = _mm_add_epi32(p0, mean[0]);
- p1 = _mm_add_epi32(p1, mean[0]);
- p2 = _mm_add_epi32(p2, mean[0]);
- p3 = _mm_add_epi32(p3, mean[0]);
-
- p0 = _mm_packus_epi32(p0, p1);
- p1 = _mm_packus_epi32(p2, p3);
- p0 = _mm_packus_epi16(p0, p1);
-
- _mm_storel_epi64((__m128i *)dst, p0);
- dst += stride;
- p0 = _mm_srli_si128(p0, 8);
- _mm_storel_epi64((__m128i *)dst, p0);
- dst += stride;
- r += 1;
- }
-}
-
-static void SavePred16x16(int *pred, const __m128i *mean, uint8_t *dst,
- ptrdiff_t stride) {
- const int predStride = (maxBlkSize << 1) + 1;
- __m128i p0, p1, p2, p3;
- int r = 0;
-
- while (r < 16) {
- p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
- p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
- p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
- p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
-
- p0 = _mm_add_epi32(p0, mean[0]);
- p1 = _mm_add_epi32(p1, mean[0]);
- p2 = _mm_add_epi32(p2, mean[0]);
- p3 = _mm_add_epi32(p3, mean[0]);
-
- p0 = _mm_packus_epi32(p0, p1);
- p1 = _mm_packus_epi32(p2, p3);
- p0 = _mm_packus_epi16(p0, p1);
-
- _mm_storel_epi64((__m128i *)dst, p0);
- p0 = _mm_srli_si128(p0, 8);
- _mm_storel_epi64((__m128i *)(dst + 8), p0);
- dst += stride;
- r += 1;
- }
-}
-
-static void SavePred32x32(int *pred, const __m128i *mean, uint8_t *dst,
- ptrdiff_t stride) {
- const int predStride = (maxBlkSize << 1) + 1;
- __m128i p0, p1, p2, p3, p4, p5, p6, p7;
- int r = 0;
-
- while (r < 32) {
- p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
- p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
- p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
- p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
-
- p4 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 16));
- p5 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 20));
- p6 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 24));
- p7 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 28));
-
- p0 = _mm_add_epi32(p0, mean[0]);
- p1 = _mm_add_epi32(p1, mean[0]);
- p2 = _mm_add_epi32(p2, mean[0]);
- p3 = _mm_add_epi32(p3, mean[0]);
-
- p4 = _mm_add_epi32(p4, mean[0]);
- p5 = _mm_add_epi32(p5, mean[0]);
- p6 = _mm_add_epi32(p6, mean[0]);
- p7 = _mm_add_epi32(p7, mean[0]);
-
- p0 = _mm_packus_epi32(p0, p1);
- p1 = _mm_packus_epi32(p2, p3);
- p0 = _mm_packus_epi16(p0, p1);
-
- p4 = _mm_packus_epi32(p4, p5);
- p5 = _mm_packus_epi32(p6, p7);
- p4 = _mm_packus_epi16(p4, p5);
-
- _mm_storel_epi64((__m128i *)dst, p0);
- p0 = _mm_srli_si128(p0, 8);
- _mm_storel_epi64((__m128i *)(dst + 8), p0);
-
- _mm_storel_epi64((__m128i *)(dst + 16), p4);
- p4 = _mm_srli_si128(p4, 8);
- _mm_storel_epi64((__m128i *)(dst + 24), p4);
-
- dst += stride;
- r += 1;
- }
-}
-
-static void SavePrediction(int *pred, const __m128i *mean, int bs, uint8_t *dst,
- ptrdiff_t stride) {
- switch (bs) {
- case 4: SavePred4x4(pred, mean, dst, stride); break;
- case 8: SavePred8x8(pred, mean, dst, stride); break;
- case 16: SavePred16x16(pred, mean, dst, stride); break;
- case 32: SavePred32x32(pred, mean, dst, stride); break;
- default: assert(0);
- }
-}
-
-typedef void (*ProducePixelsFunc)(__m128i *p, const __m128i *prm, int *pred,
- const int predStride);
-
-static void ProduceFourPixels(__m128i *p, const __m128i *prm, int *pred,
- const int predStride) {
- __m128i u0, u1, u2;
- int c0 = _mm_extract_epi32(prm[1], 0);
- int x = *(pred + predStride);
- int sum;
-
- u0 = _mm_mullo_epi32(p[0], prm[2]);
- u1 = _mm_mullo_epi32(p[1], prm[0]);
- u2 = _mm_mullo_epi32(p[2], prm[3]);
-
- u0 = _mm_add_epi32(u0, u1);
- u0 = _mm_add_epi32(u0, u2);
-
- sum = _mm_extract_epi32(u0, 0);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 1) = x;
-
- sum = _mm_extract_epi32(u0, 1);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 2) = x;
-
- sum = _mm_extract_epi32(u0, 2);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 3) = x;
-
- sum = _mm_extract_epi32(u0, 3);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 4) = x;
-}
-
-static void ProduceThreePixels(__m128i *p, const __m128i *prm, int *pred,
- const int predStride) {
- __m128i u0, u1, u2;
- int c0 = _mm_extract_epi32(prm[1], 0);
- int x = *(pred + predStride);
- int sum;
-
- u0 = _mm_mullo_epi32(p[0], prm[2]);
- u1 = _mm_mullo_epi32(p[1], prm[0]);
- u2 = _mm_mullo_epi32(p[2], prm[3]);
-
- u0 = _mm_add_epi32(u0, u1);
- u0 = _mm_add_epi32(u0, u2);
-
- sum = _mm_extract_epi32(u0, 0);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 1) = x;
-
- sum = _mm_extract_epi32(u0, 1);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 2) = x;
-
- sum = _mm_extract_epi32(u0, 2);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 3) = x;
-}
-
-static void ProduceTwoPixels(__m128i *p, const __m128i *prm, int *pred,
- const int predStride) {
- __m128i u0, u1, u2;
- int c0 = _mm_extract_epi32(prm[1], 0);
- int x = *(pred + predStride);
- int sum;
-
- u0 = _mm_mullo_epi32(p[0], prm[2]);
- u1 = _mm_mullo_epi32(p[1], prm[0]);
- u2 = _mm_mullo_epi32(p[2], prm[3]);
-
- u0 = _mm_add_epi32(u0, u1);
- u0 = _mm_add_epi32(u0, u2);
-
- sum = _mm_extract_epi32(u0, 0);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 1) = x;
-
- sum = _mm_extract_epi32(u0, 1);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 2) = x;
-}
-
-static void ProduceOnePixels(__m128i *p, const __m128i *prm, int *pred,
- const int predStride) {
- __m128i u0, u1, u2;
- int c0 = _mm_extract_epi32(prm[1], 0);
- int x = *(pred + predStride);
- int sum;
-
- u0 = _mm_mullo_epi32(p[0], prm[2]);
- u1 = _mm_mullo_epi32(p[1], prm[0]);
- u2 = _mm_mullo_epi32(p[2], prm[3]);
-
- u0 = _mm_add_epi32(u0, u1);
- u0 = _mm_add_epi32(u0, u2);
-
- sum = _mm_extract_epi32(u0, 0);
- sum += c0 * x;
- x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
- *(pred + predStride + 1) = x;
-}
-
-static ProducePixelsFunc prodPixelsFuncTab[4] = {
- ProduceOnePixels, ProduceTwoPixels, ProduceThreePixels, ProduceFourPixels
-};
-
-static void ProducePixels(int *pred, const __m128i *prm, int remain) {
- __m128i p[3];
- const int predStride = (maxBlkSize << 1) + 1;
- int index;
-
- p[0] = _mm_loadu_si128((const __m128i *)pred);
- p[1] = _mm_loadu_si128((const __m128i *)(pred + 1));
- p[2] = _mm_loadu_si128((const __m128i *)(pred + 2));
-
- if (remain <= 2) {
- return;
- }
- if (remain > 5) {
- index = 3;
- } else {
- index = remain - 3;
- }
- prodPixelsFuncTab[index](p, prm, pred, predStride);
-}
-
-// Note:
-// At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
-// the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
-static void GeneratePrediction(const uint8_t *above, const uint8_t *left,
- const int bs, const __m128i *prm, int meanValue,
- uint8_t *dst, ptrdiff_t stride) {
- int pred[33][65];
- int r, c, colBound;
- int remainings;
-
- for (r = 0; r < bs; ++r) {
- pred[r + 1][0] = (int)left[r] - meanValue;
- }
-
- above -= 1;
- for (c = 0; c < 2 * bs + 1; ++c) {
- pred[0][c] = (int)above[c] - meanValue;
- }
-
- r = 0;
- c = 0;
- while (r < bs) {
- colBound = (bs << 1) - r;
- for (c = 0; c < colBound; c += 4) {
- remainings = colBound - c + 1;
- ProducePixels(&pred[r][c], prm, remainings);
- }
- r += 1;
- }
-
- SavePrediction(&pred[1][1], &prm[4], bs, dst, stride);
-}
-
-static void FilterPrediction(const uint8_t *above, const uint8_t *left, int bs,
- __m128i *prm, uint8_t *dst, ptrdiff_t stride) {
- int meanValue = 0;
- meanValue = CalcRefPixelsMeanValue(above, left, bs, &prm[4]);
- GeneratePrediction(above, left, bs, prm, meanValue, dst, stride);
-}
-
-void av1_dc_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, DC_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_v_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, V_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_h_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, H_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d45_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D45_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d135_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D135_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d117_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D117_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d153_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D153_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d207_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D207_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_d63_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D63_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-void av1_tm_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- __m128i prm[5];
- GetIntraFilterParams(bs, TM_PRED, &prm[0]);
- FilterPrediction(above, left, bs, prm, dst, stride);
-}
-
-// ============== High Bit Depth ==============
-#if CONFIG_HIGHBITDEPTH
-static INLINE int HighbdGetMeanValue4x4(const uint16_t *above,
- const uint16_t *left, const int bd,
- __m128i *params) {
- const __m128i a = _mm_loadu_si128((const __m128i *)above);
- const __m128i l = _mm_loadu_si128((const __m128i *)left);
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint16_t sum_value;
- (void)bd;
-
- sum_vector = _mm_add_epi16(a, l);
-
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
-
- sum_value = _mm_extract_epi16(sum_vector, 0);
- sum_value += 4;
- sum_value >>= 3;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-static INLINE int HighbdGetMeanValue8x8(const uint16_t *above,
- const uint16_t *left, const int bd,
- __m128i *params) {
- const __m128i a = _mm_loadu_si128((const __m128i *)above);
- const __m128i l = _mm_loadu_si128((const __m128i *)left);
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint16_t sum_value;
- (void)bd;
-
- sum_vector = _mm_add_epi16(a, l);
-
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
-
- sum_value = _mm_extract_epi16(sum_vector, 0);
- sum_value += 8;
- sum_value >>= 4;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-// Note:
-// Process 16 pixels above and left, 10-bit depth
-// Add to the last 8 pixels sum
-static INLINE void AddPixels10bit(const uint16_t *above, const uint16_t *left,
- __m128i *sum) {
- __m128i a = _mm_loadu_si128((const __m128i *)above);
- __m128i l = _mm_loadu_si128((const __m128i *)left);
- sum[0] = _mm_add_epi16(a, l);
- a = _mm_loadu_si128((const __m128i *)(above + 8));
- l = _mm_loadu_si128((const __m128i *)(left + 8));
- sum[0] = _mm_add_epi16(sum[0], a);
- sum[0] = _mm_add_epi16(sum[0], l);
-}
-
-// Note:
-// Process 16 pixels above and left, 12-bit depth
-// Add to the last 8 pixels sum
-static INLINE void AddPixels12bit(const uint16_t *above, const uint16_t *left,
- __m128i *sum) {
- __m128i a = _mm_loadu_si128((const __m128i *)above);
- __m128i l = _mm_loadu_si128((const __m128i *)left);
- const __m128i zero = _mm_setzero_si128();
- __m128i v0, v1;
-
- v0 = _mm_unpacklo_epi16(a, zero);
- v1 = _mm_unpacklo_epi16(l, zero);
- sum[0] = _mm_add_epi32(v0, v1);
-
- v0 = _mm_unpackhi_epi16(a, zero);
- v1 = _mm_unpackhi_epi16(l, zero);
- sum[0] = _mm_add_epi32(sum[0], v0);
- sum[0] = _mm_add_epi32(sum[0], v1);
-
- a = _mm_loadu_si128((const __m128i *)(above + 8));
- l = _mm_loadu_si128((const __m128i *)(left + 8));
-
- v0 = _mm_unpacklo_epi16(a, zero);
- v1 = _mm_unpacklo_epi16(l, zero);
- sum[0] = _mm_add_epi32(sum[0], v0);
- sum[0] = _mm_add_epi32(sum[0], v1);
-
- v0 = _mm_unpackhi_epi16(a, zero);
- v1 = _mm_unpackhi_epi16(l, zero);
- sum[0] = _mm_add_epi32(sum[0], v0);
- sum[0] = _mm_add_epi32(sum[0], v1);
-}
-
-static INLINE int HighbdGetMeanValue16x16(const uint16_t *above,
- const uint16_t *left, const int bd,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector, u;
- uint32_t sum_value = 0;
-
- if (10 == bd) {
- AddPixels10bit(above, left, &sum_vector);
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
- sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector, 2);
- sum_vector = _mm_add_epi16(sum_vector, u);
- sum_value = _mm_extract_epi16(sum_vector, 0);
- } else if (12 == bd) {
- AddPixels12bit(above, left, &sum_vector);
-
- sum_vector = _mm_hadd_epi32(sum_vector, zero);
- u = _mm_srli_si128(sum_vector, 4);
- sum_vector = _mm_add_epi32(u, sum_vector);
- sum_value = _mm_extract_epi32(sum_vector, 0);
- }
-
- sum_value += 16;
- sum_value >>= 5;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-static INLINE int HighbdGetMeanValue32x32(const uint16_t *above,
- const uint16_t *left, const int bd,
- __m128i *params) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sum_vector[2], u;
- uint32_t sum_value = 0;
-
- if (10 == bd) {
- AddPixels10bit(above, left, &sum_vector[0]);
- AddPixels10bit(above + 16, left + 16, &sum_vector[1]);
-
- sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
- sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 4 values
- sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 2 values
-
- u = _mm_srli_si128(sum_vector[0], 2);
- sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
- sum_value = _mm_extract_epi16(sum_vector[0], 0);
- } else if (12 == bd) {
- AddPixels12bit(above, left, &sum_vector[0]);
- AddPixels12bit(above + 16, left + 16, &sum_vector[1]);
-
- sum_vector[0] = _mm_add_epi32(sum_vector[0], sum_vector[1]);
- sum_vector[0] = _mm_hadd_epi32(sum_vector[0], zero);
- u = _mm_srli_si128(sum_vector[0], 4);
- sum_vector[0] = _mm_add_epi32(u, sum_vector[0]);
- sum_value = _mm_extract_epi32(sum_vector[0], 0);
- }
-
- sum_value += 32;
- sum_value >>= 6;
- *params = _mm_set1_epi32(sum_value);
- return sum_value;
-}
-
-// Note:
-// params[4] : mean value, 4 int32_t repetition
-//
-static INLINE int HighbdCalcRefPixelsMeanValue(const uint16_t *above,
- const uint16_t *left, int bs,
- const int bd, __m128i *params) {
- int meanValue = 0;
- switch (bs) {
- case 4: meanValue = HighbdGetMeanValue4x4(above, left, bd, params); break;
- case 8: meanValue = HighbdGetMeanValue8x8(above, left, bd, params); break;
- case 16:
- meanValue = HighbdGetMeanValue16x16(above, left, bd, params);
- break;
- case 32:
- meanValue = HighbdGetMeanValue32x32(above, left, bd, params);
- break;
- default: assert(0);
- }
- return meanValue;
-}
-
-// Note:
-// At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
-// the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
-static void HighbdGeneratePrediction(const uint16_t *above,
- const uint16_t *left, const int bs,
- const int bd, const __m128i *prm,
- int meanValue, uint16_t *dst,
- ptrdiff_t stride) {
- int pred[33][65];
- int r, c, colBound;
- int remainings;
- int ipred;
-
- for (r = 0; r < bs; ++r) {
- pred[r + 1][0] = (int)left[r] - meanValue;
- }
-
- above -= 1;
- for (c = 0; c < 2 * bs + 1; ++c) {
- pred[0][c] = (int)above[c] - meanValue;
- }
-
- r = 0;
- c = 0;
- while (r < bs) {
- colBound = (bs << 1) - r;
- for (c = 0; c < colBound; c += 4) {
- remainings = colBound - c + 1;
- ProducePixels(&pred[r][c], prm, remainings);
+void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride,
+ TX_SIZE tx_size, const uint8_t *above,
+ const uint8_t *left, int mode) {
+ int r, c;
+ uint8_t buffer[33][33];
+ const int bw = tx_size_wide[tx_size];
+ const int bh = tx_size_high[tx_size];
+
+ assert(bw <= 32 && bh <= 32);
+
+ // The initialization is just for silencing Jenkins static analysis warnings
+ for (r = 0; r < bh + 1; ++r)
+ memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
+
+ for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
+ memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
+
+ const __m128i f1f0 = xx_load_128(av1_filter_intra_taps[mode][0]);
+ const __m128i f3f2 = xx_load_128(av1_filter_intra_taps[mode][2]);
+ const __m128i f5f4 = xx_load_128(av1_filter_intra_taps[mode][4]);
+ const __m128i f7f6 = xx_load_128(av1_filter_intra_taps[mode][6]);
+ const __m128i filter_intra_scale_bits =
+ _mm_set1_epi16(1 << (15 - FILTER_INTRA_SCALE_BITS));
+
+ for (r = 1; r < bh + 1; r += 2) {
+ for (c = 1; c < bw + 1; c += 4) {
+ DECLARE_ALIGNED(16, uint8_t, p[8]);
+ memcpy(p, &buffer[r - 1][c - 1], 5 * sizeof(uint8_t));
+ p[5] = buffer[r][c - 1];
+ p[6] = buffer[r + 1][c - 1];
+ p[7] = 0;
+ const __m128i p_b = xx_loadl_64(p);
+ const __m128i in = _mm_unpacklo_epi64(p_b, p_b);
+ const __m128i out_01 = _mm_maddubs_epi16(in, f1f0);
+ const __m128i out_23 = _mm_maddubs_epi16(in, f3f2);
+ const __m128i out_45 = _mm_maddubs_epi16(in, f5f4);
+ const __m128i out_67 = _mm_maddubs_epi16(in, f7f6);
+ const __m128i out_0123 = _mm_hadd_epi16(out_01, out_23);
+ const __m128i out_4567 = _mm_hadd_epi16(out_45, out_67);
+ const __m128i out_01234567 = _mm_hadd_epi16(out_0123, out_4567);
+ // Rounding
+ const __m128i round_w =
+ _mm_mulhrs_epi16(out_01234567, filter_intra_scale_bits);
+ const __m128i out_r = _mm_packus_epi16(round_w, round_w);
+ const __m128i out_r1 = _mm_srli_si128(out_r, 4);
+ // Storing
+ xx_storel_32(&buffer[r][c], out_r);
+ xx_storel_32(&buffer[r + 1][c], out_r1);
}
- r += 1;
}
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) {
- ipred = pred[r + 1][c + 1] + meanValue;
- dst[c] = clip_pixel_highbd(ipred, bd);
- }
+ for (r = 0; r < bh; ++r) {
+ memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
dst += stride;
}
}
-
-static void HighbdFilterPrediction(const uint16_t *above, const uint16_t *left,
- int bs, const int bd, __m128i *prm,
- uint16_t *dst, ptrdiff_t stride) {
- int meanValue = 0;
- meanValue = HighbdCalcRefPixelsMeanValue(above, left, bs, bd, &prm[4]);
- HighbdGeneratePrediction(above, left, bs, bd, prm, meanValue, dst, stride);
-}
-
-void av1_highbd_dc_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, DC_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_v_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, V_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_h_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, H_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d45_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D45_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d135_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D135_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d117_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D117_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d153_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D153_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d207_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D207_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_d63_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, D63_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-
-void av1_highbd_tm_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
- int bs, const uint16_t *above,
- const uint16_t *left, int bd) {
- __m128i prm[5];
- GetIntraFilterParams(bs, TM_PRED, &prm[0]);
- HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
-}
-#endif // CONFIG_HIGHBITDEPTH
-
-#endif // USE_3TAP_INTRA_FILTER
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c
new file mode 100644
index 000000000..a34c618d0
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "av1/common/convolve.h"
+
+void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4,
+ const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = 8;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ // Check that, even with 12-bit input, the intermediate values will fit
+ // into an unsigned 16-bit intermediate array.
+ assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
+ __m256i s[8], coeffs_y[4], coeffs_x[4];
+
+ const __m256i round_const_x = _mm256_set1_epi32(
+ ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
+
+ const __m256i round_const_y = _mm256_set1_epi32(
+ ((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+ const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1);
+ const __m256i clip_pixel =
+ _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const __m256i zero = _mm256_setzero_si256();
+
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ {
+ for (i = 0; i < im_h; i += 2) {
+ const __m256i row0 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
+ __m256i row1 = _mm256_set1_epi16(0);
+ if (i + 1 < im_h)
+ row1 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
+
+ const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
+ const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
+
+ // even pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 0);
+ s[1] = _mm256_alignr_epi8(r1, r0, 4);
+ s[2] = _mm256_alignr_epi8(r1, r0, 8);
+ s[3] = _mm256_alignr_epi8(r1, r0, 12);
+
+ __m256i res_even = convolve(s, coeffs_x);
+ res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
+ round_shift_x);
+
+ // odd pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 2);
+ s[1] = _mm256_alignr_epi8(r1, r0, 6);
+ s[2] = _mm256_alignr_epi8(r1, r0, 10);
+ s[3] = _mm256_alignr_epi8(r1, r0, 14);
+
+ __m256i res_odd = convolve(s, coeffs_x);
+ res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
+ round_shift_x);
+
+ __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
+ __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
+ __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
+
+ _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
+ }
+ }
+
+ /* Vertical filter */
+ {
+ __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
+ __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
+ __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
+ __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
+ __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
+ __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
+
+ s[0] = _mm256_unpacklo_epi16(s0, s1);
+ s[1] = _mm256_unpacklo_epi16(s2, s3);
+ s[2] = _mm256_unpacklo_epi16(s4, s5);
+
+ s[4] = _mm256_unpackhi_epi16(s0, s1);
+ s[5] = _mm256_unpackhi_epi16(s2, s3);
+ s[6] = _mm256_unpackhi_epi16(s4, s5);
+
+ for (i = 0; i < h; i += 2) {
+ const int16_t *data = &im_block[i * im_stride];
+
+ const __m256i s6 =
+ _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
+ const __m256i s7 =
+ _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
+
+ s[3] = _mm256_unpacklo_epi16(s6, s7);
+ s[7] = _mm256_unpackhi_epi16(s6, s7);
+
+ const __m256i res_a = convolve(s, coeffs_y);
+ __m256i res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a, round_const_y), round_shift_y);
+
+ res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a_round, round_const_bits), round_shift_bits);
+
+ if (w - j > 4) {
+ const __m256i res_b = convolve(s + 4, coeffs_y);
+ __m256i res_b_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_b, round_const_y), round_shift_y);
+ res_b_round =
+ _mm256_sra_epi32(_mm256_add_epi32(res_b_round, round_const_bits),
+ round_shift_bits);
+
+ __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round);
+ res_16bit = _mm256_min_epi16(res_16bit, clip_pixel);
+ res_16bit = _mm256_max_epi16(res_16bit, zero);
+
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j],
+ _mm256_castsi256_si128(res_16bit));
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ _mm256_extracti128_si256(res_16bit, 1));
+ } else if (w == 4) {
+ res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
+ res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
+ res_a_round = _mm256_max_epi16(res_a_round, zero);
+
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j],
+ _mm256_castsi256_si128(res_a_round));
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ _mm256_extracti128_si256(res_a_round, 1));
+ } else {
+ res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
+ res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
+ res_a_round = _mm256_max_epi16(res_a_round, zero);
+
+ xx_storel_32((__m128i *)&dst[i * dst_stride + j],
+ _mm256_castsi256_si128(res_a_round));
+ xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ _mm256_extracti128_si256(res_a_round, 1));
+ }
+
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+ }
+}
+
+static INLINE void copy_64(const uint16_t *src, uint16_t *dst) {
+ __m256i s[4];
+ s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
+ s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
+ s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
+ s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
+ _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]);
+ _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]);
+}
+
+static INLINE void copy_128(const uint16_t *src, uint16_t *dst) {
+ __m256i s[8];
+ s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
+ s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
+ s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
+ s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
+ s[4] = _mm256_loadu_si256((__m256i *)(src + 4 * 16));
+ s[5] = _mm256_loadu_si256((__m256i *)(src + 5 * 16));
+ s[6] = _mm256_loadu_si256((__m256i *)(src + 6 * 16));
+ s[7] = _mm256_loadu_si256((__m256i *)(src + 7 * 16));
+
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
+ _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]);
+ _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]);
+ _mm256_storeu_si256((__m256i *)(dst + 4 * 16), s[4]);
+ _mm256_storeu_si256((__m256i *)(dst + 5 * 16), s[5]);
+ _mm256_storeu_si256((__m256i *)(dst + 6 * 16), s[6]);
+ _mm256_storeu_si256((__m256i *)(dst + 7 * 16), s[7]);
+}
+
+void av1_highbd_convolve_2d_copy_sr_avx2(
+ const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+ (void)bd;
+
+ if (w >= 16) {
+ assert(!((intptr_t)dst % 16));
+ assert(!(dst_stride % 16));
+ }
+
+ if (w == 2) {
+ do {
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ memcpy(dst, src, 2 * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 4) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ _mm_storel_epi64((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_storel_epi64((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 8) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ _mm_store_si128((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 16) {
+ do {
+ __m256i s[2];
+ s[0] = _mm256_loadu_si256((__m256i *)src);
+ src += src_stride;
+ s[1] = _mm256_loadu_si256((__m256i *)src);
+ src += src_stride;
+ _mm256_storeu_si256((__m256i *)dst, s[0]);
+ dst += dst_stride;
+ _mm256_storeu_si256((__m256i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 32) {
+ do {
+ __m256i s[4];
+ s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
+ s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
+ src += src_stride;
+ s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
+ s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
+ src += src_stride;
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
+ dst += dst_stride;
+ _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[2]);
+ _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[3]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 64) {
+ do {
+ copy_64(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_64(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else {
+ do {
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c
new file mode 100644
index 000000000..bdf813fa0
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <emmintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_filter.h"
+
+static INLINE void copy_64(const uint16_t *src, uint16_t *dst) {
+ __m128i s[8];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
+ s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8));
+ s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8));
+ s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8));
+ s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8));
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
+ _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
+ _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
+ _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]);
+ _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]);
+ _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]);
+ _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]);
+}
+
+static INLINE void copy_128(const uint16_t *src, uint16_t *dst) {
+ __m128i s[16];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
+ s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8));
+ s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8));
+ s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8));
+ s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8));
+ s[8] = _mm_loadu_si128((__m128i *)(src + 8 * 8));
+ s[9] = _mm_loadu_si128((__m128i *)(src + 9 * 8));
+ s[10] = _mm_loadu_si128((__m128i *)(src + 10 * 8));
+ s[11] = _mm_loadu_si128((__m128i *)(src + 11 * 8));
+ s[12] = _mm_loadu_si128((__m128i *)(src + 12 * 8));
+ s[13] = _mm_loadu_si128((__m128i *)(src + 13 * 8));
+ s[14] = _mm_loadu_si128((__m128i *)(src + 14 * 8));
+ s[15] = _mm_loadu_si128((__m128i *)(src + 15 * 8));
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
+ _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
+ _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
+ _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]);
+ _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]);
+ _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]);
+ _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]);
+ _mm_store_si128((__m128i *)(dst + 8 * 8), s[8]);
+ _mm_store_si128((__m128i *)(dst + 9 * 8), s[9]);
+ _mm_store_si128((__m128i *)(dst + 10 * 8), s[10]);
+ _mm_store_si128((__m128i *)(dst + 11 * 8), s[11]);
+ _mm_store_si128((__m128i *)(dst + 12 * 8), s[12]);
+ _mm_store_si128((__m128i *)(dst + 13 * 8), s[13]);
+ _mm_store_si128((__m128i *)(dst + 14 * 8), s[14]);
+ _mm_store_si128((__m128i *)(dst + 15 * 8), s[15]);
+}
+
+void av1_highbd_convolve_2d_copy_sr_sse2(
+ const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+ (void)conv_params;
+ (void)bd;
+ if (w >= 16) {
+ assert(!((intptr_t)dst % 16));
+ assert(!(dst_stride % 16));
+ }
+
+ if (w == 2) {
+ do {
+ __m128i s = _mm_loadl_epi64((__m128i *)src);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+ src += src_stride;
+ dst += dst_stride;
+ s = _mm_loadl_epi64((__m128i *)src);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 4) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadl_epi64((__m128i *)src);
+ src += src_stride;
+ _mm_storel_epi64((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_storel_epi64((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 8) {
+ do {
+ __m128i s[2];
+ s[0] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ s[1] = _mm_loadu_si128((__m128i *)src);
+ src += src_stride;
+ _mm_store_si128((__m128i *)dst, s[0]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)dst, s[1]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 16) {
+ do {
+ __m128i s[4];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ src += src_stride;
+ s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ src += src_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[2]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[3]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 32) {
+ do {
+ __m128i s[8];
+ s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+ s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
+ src += src_stride;
+ s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
+ s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
+ s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+ s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
+ src += src_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
+ _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
+ _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
+ dst += dst_stride;
+ _mm_store_si128((__m128i *)(dst + 0 * 8), s[4]);
+ _mm_store_si128((__m128i *)(dst + 1 * 8), s[5]);
+ _mm_store_si128((__m128i *)(dst + 2 * 8), s[6]);
+ _mm_store_si128((__m128i *)(dst + 3 * 8), s[7]);
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else if (w == 64) {
+ do {
+ copy_64(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_64(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ } else {
+ do {
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ copy_128(src, dst);
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c
new file mode 100644
index 000000000..5d2fc465e
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+#include <smmintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_sse2.h"
+#include "aom_dsp/x86/convolve_sse4_1.h"
+#include "av1/common/convolve.h"
+
+void av1_highbd_jnt_convolve_2d_copy_sse4_1(
+ const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+ const __m128i zero = _mm_setzero_si128();
+ int i, j;
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi32(offset);
+ const __m128i offset_const_16b = _mm_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
+ const __m128i clip_pixel_to_bd =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ assert(bits <= 4);
+
+ if (!(w % 8)) {
+ for (i = 0; i < h; i += 1) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i src_16bit =
+ _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]));
+ const __m128i res = _mm_sll_epi16(src_16bit, left_shift);
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero);
+ const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero);
+
+ const __m128i res_32b_lo = _mm_unpacklo_epi16(res, zero);
+ const __m128i res_unsigned_lo =
+ _mm_add_epi32(res_32b_lo, offset_const);
+
+ const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero);
+ const __m128i res_unsigned_hi =
+ _mm_add_epi32(res_32b_hi, offset_const);
+
+ const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_lo = highbd_convolve_rounding_sse2(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+ const __m128i round_result_hi = highbd_convolve_rounding_sse2(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_16b =
+ _mm_packus_epi32(round_result_lo, round_result_hi);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ const __m128i res_unsigned_16b =
+ _mm_adds_epu16(res, offset_const_16b);
+
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]),
+ res_unsigned_16b);
+ }
+ }
+ }
+ } else if (!(w % 4)) {
+ for (i = 0; i < h; i += 2) {
+ for (j = 0; j < w; j += 4) {
+ const __m128i src_row_0 =
+ _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
+ const __m128i src_row_1 =
+ _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
+ const __m128i src_10 = _mm_unpacklo_epi64(src_row_0, src_row_1);
+
+ const __m128i res = _mm_sll_epi16(src_10, left_shift);
+
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_1 = _mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
+
+ const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
+ const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero);
+
+ const __m128i res_32b = _mm_unpacklo_epi16(res, zero);
+ const __m128i res_unsigned_lo = _mm_add_epi32(res_32b, offset_const);
+
+ const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero);
+ const __m128i res_unsigned_hi =
+ _mm_add_epi32(res_32b_hi, offset_const);
+
+ const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
+ &data_ref_1, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_lo = highbd_convolve_rounding_sse2(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+ const __m128i round_result_hi = highbd_convolve_rounding_sse2(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_16b =
+ _mm_packus_epi32(round_result_lo, round_result_hi);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_1 = _mm_srli_si128(res_clip, 8);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ const __m128i res_unsigned_16b =
+ _mm_adds_epu16(res, offset_const_16b);
+
+ const __m128i res_1 = _mm_srli_si128(res_unsigned_16b, 8);
+
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]),
+ res_unsigned_16b);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_2d_sse4_1(
+ const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ DECLARE_ALIGNED(16, int16_t,
+ im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = MAX_SB_SIZE;
+ int i, j;
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
+ const __m128i clip_pixel_to_bd =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ // Check that, even with 12-bit input, the intermediate values will fit
+ // into an unsigned 16-bit intermediate array.
+ assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
+ /* Horizontal filter */
+ {
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
+
+ for (i = 0; i < im_h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+ const __m128i data2 =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
+
+ // Filter even-index pixels
+ const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
+ const __m128i res_2 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
+ const __m128i res_4 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
+ const __m128i res_6 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
+
+ __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
+ _mm_add_epi32(res_2, res_6));
+ res_even =
+ _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
+
+ // Filter odd-index pixels
+ const __m128i res_1 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
+ const __m128i res_3 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
+ const __m128i res_5 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
+ const __m128i res_7 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
+
+ __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
+ _mm_add_epi32(res_3, res_7));
+ res_odd =
+ _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
+
+ // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
+ __m128i res = _mm_packs_epi32(res_even, res_odd);
+ _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ ((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ // Filter even-index pixels
+ const int16_t *data = &im_block[i * im_stride + j];
+ const __m128i src_0 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
+ *(__m128i *)(data + 1 * im_stride));
+ const __m128i src_2 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
+ *(__m128i *)(data + 3 * im_stride));
+ const __m128i src_4 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
+ *(__m128i *)(data + 5 * im_stride));
+ const __m128i src_6 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
+ *(__m128i *)(data + 7 * im_stride));
+
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
+ _mm_add_epi32(res_4, res_6));
+
+ // Filter odd-index pixels
+ const __m128i src_1 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
+ *(__m128i *)(data + 1 * im_stride));
+ const __m128i src_3 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
+ *(__m128i *)(data + 3 * im_stride));
+ const __m128i src_5 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
+ *(__m128i *)(data + 5 * im_stride));
+ const __m128i src_7 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
+ *(__m128i *)(data + 7 * im_stride));
+
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
+ _mm_add_epi32(res_5, res_7));
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+ const __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+
+ const __m128i res_unsigned_lo =
+ _mm_add_epi32(res_lo_round, offset_const);
+
+ if (w < 8) {
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i data_ref_0 = _mm_cvtepu16_epi32(data_0);
+
+ const __m128i comp_avg_res = highbd_comp_avg_sse4_1(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result = highbd_convolve_rounding_sse2(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_16b =
+ _mm_packus_epi32(round_result, round_result);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ const __m128i res_16b =
+ _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_16b);
+ }
+ } else {
+ const __m128i res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+
+ const __m128i res_unsigned_hi =
+ _mm_add_epi32(res_hi_round, offset_const);
+
+ if (do_average) {
+ const __m128i data_lo =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_hi =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j + 4]));
+
+ const __m128i data_ref_0_lo = _mm_cvtepu16_epi32(data_lo);
+ const __m128i data_ref_0_hi = _mm_cvtepu16_epi32(data_hi);
+
+ const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_lo =
+ highbd_convolve_rounding_sse2(&comp_avg_res_lo, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m128i round_result_hi =
+ highbd_convolve_rounding_sse2(&comp_avg_res_hi, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m128i res_16b =
+ _mm_packus_epi32(round_result_lo, round_result_hi);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ const __m128i res_16b =
+ _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
index 195f0f570..a9cf6a4d6 100644
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
@@ -12,375 +12,209 @@
#include <tmmintrin.h>
#include <assert.h>
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_dsp_rtcd.h"
+
#include "aom_dsp/aom_convolve.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_sse2.h"
#include "av1/common/convolve.h"
-#if CONFIG_COMPOUND_ROUND
-void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
- int h, InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
+void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4,
+ const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
+ int im_stride = 8;
int i, j;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int do_average = conv_params->do_average;
const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << conv_params->round_0) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
+ // Check that, even with 12-bit input, the intermediate values will fit
+ // into an unsigned 16-bit intermediate array.
+ assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+ __m128i coeffs_x[4], coeffs_y[4], s[16];
+
+ const __m128i round_const_x = _mm_set1_epi32(
+ ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
+
+ const __m128i round_const_y =
+ _mm_set1_epi32(((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+ const __m128i round_const_bits = _mm_set1_epi32((1 << bits) >> 1);
+ const __m128i clip_pixel =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const __m128i zero = _mm_setzero_si128();
+
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ {
+ for (i = 0; i < im_h; i += 1) {
+ const __m128i row00 =
_mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i data2 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
- // Filter even-index pixels
- const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
+ const __m128i row01 =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]);
+
+ // even pixels
+ s[0] = _mm_alignr_epi8(row01, row00, 0);
+ s[1] = _mm_alignr_epi8(row01, row00, 4);
+ s[2] = _mm_alignr_epi8(row01, row00, 8);
+ s[3] = _mm_alignr_epi8(row01, row00, 12);
+
+ __m128i res_even = convolve(s, coeffs_x);
+ res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x),
+ round_shift_x);
+
+ // odd pixels
+ s[0] = _mm_alignr_epi8(row01, row00, 2);
+ s[1] = _mm_alignr_epi8(row01, row00, 6);
+ s[2] = _mm_alignr_epi8(row01, row00, 10);
+ s[3] = _mm_alignr_epi8(row01, row00, 14);
+
+ __m128i res_odd = convolve(s, coeffs_x);
res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
+ _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x);
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- const __m128i maxval = _mm_set1_epi16((1 << bd) - 1);
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- res = _mm_max_epi16(_mm_min_epi16(res, maxval), _mm_setzero_si128());
- _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
+ __m128i res_even1 = _mm_packs_epi32(res_even, res_even);
+ __m128i res_odd1 = _mm_packs_epi32(res_odd, res_odd);
+ __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1);
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << conv_params->round_1) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const int16_t *data = &im_block[i * im_stride + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- // Accumulate values into the destination buffer
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- if (do_average) {
- _mm_storeu_si128(p + 0,
- _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
- } else {
- _mm_storeu_si128(p + 0, res_lo_round);
- _mm_storeu_si128(p + 1, res_hi_round);
- }
+ _mm_store_si128((__m128i *)&im_block[i * im_stride], res);
}
}
- }
-}
-#else
-void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
- CONV_BUF_TYPE *dst, int dst_stride, int w,
- int h, InterpFilterParams *filter_params_x,
- InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
- int i, j;
- const int do_average = conv_params->do_average;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+ /* Vertical filter */
+ {
+ __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride));
+ __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride));
+ __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride));
+ __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride));
+ __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride));
+ __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride));
+ __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride));
+
+ s[0] = _mm_unpacklo_epi16(s0, s1);
+ s[1] = _mm_unpacklo_epi16(s2, s3);
+ s[2] = _mm_unpacklo_epi16(s4, s5);
+
+ s[4] = _mm_unpackhi_epi16(s0, s1);
+ s[5] = _mm_unpackhi_epi16(s2, s3);
+ s[6] = _mm_unpackhi_epi16(s4, s5);
+
+ s[0 + 8] = _mm_unpacklo_epi16(s1, s2);
+ s[1 + 8] = _mm_unpacklo_epi16(s3, s4);
+ s[2 + 8] = _mm_unpacklo_epi16(s5, s6);
+
+ s[4 + 8] = _mm_unpackhi_epi16(s1, s2);
+ s[5 + 8] = _mm_unpackhi_epi16(s3, s4);
+ s[6 + 8] = _mm_unpackhi_epi16(s5, s6);
+
+ for (i = 0; i < h; i += 2) {
+ const int16_t *data = &im_block[i * im_stride];
+
+ __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * im_stride));
+ __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * im_stride));
+
+ s[3] = _mm_unpacklo_epi16(s6, s7);
+ s[7] = _mm_unpackhi_epi16(s6, s7);
+
+ s[3 + 8] = _mm_unpacklo_epi16(s7, s8);
+ s[7 + 8] = _mm_unpackhi_epi16(s7, s8);
+
+ const __m128i res_a0 = convolve(s, coeffs_y);
+ __m128i res_a_round0 =
+ _mm_sra_epi32(_mm_add_epi32(res_a0, round_const_y), round_shift_y);
+ res_a_round0 = _mm_sra_epi32(
+ _mm_add_epi32(res_a_round0, round_const_bits), round_shift_bits);
+
+ const __m128i res_a1 = convolve(s + 8, coeffs_y);
+ __m128i res_a_round1 =
+ _mm_sra_epi32(_mm_add_epi32(res_a1, round_const_y), round_shift_y);
+ res_a_round1 = _mm_sra_epi32(
+ _mm_add_epi32(res_a_round1, round_const_bits), round_shift_bits);
+
+ if (w - j > 4) {
+ const __m128i res_b0 = convolve(s + 4, coeffs_y);
+ __m128i res_b_round0 = _mm_sra_epi32(
+ _mm_add_epi32(res_b0, round_const_y), round_shift_y);
+ res_b_round0 = _mm_sra_epi32(
+ _mm_add_epi32(res_b_round0, round_const_bits), round_shift_bits);
+
+ const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y);
+ __m128i res_b_round1 = _mm_sra_epi32(
+ _mm_add_epi32(res_b1, round_const_y), round_shift_y);
+ res_b_round1 = _mm_sra_epi32(
+ _mm_add_epi32(res_b_round1, round_const_bits), round_shift_bits);
+
+ __m128i res_16bit0 = _mm_packs_epi32(res_a_round0, res_b_round0);
+ res_16bit0 = _mm_min_epi16(res_16bit0, clip_pixel);
+ res_16bit0 = _mm_max_epi16(res_16bit0, zero);
+
+ __m128i res_16bit1 = _mm_packs_epi32(res_a_round1, res_b_round1);
+ res_16bit1 = _mm_min_epi16(res_16bit1, clip_pixel);
+ res_16bit1 = _mm_max_epi16(res_16bit1, zero);
+
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_16bit0);
+ _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ res_16bit1);
+ } else if (w == 4) {
+ res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
+ res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
+ res_a_round0 = _mm_max_epi16(res_a_round0, zero);
+
+ res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
+ res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
+ res_a_round1 = _mm_max_epi16(res_a_round1, zero);
+
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_a_round0);
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ res_a_round1);
+ } else {
+ res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
+ res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
+ res_a_round0 = _mm_max_epi16(res_a_round0, zero);
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 15-bit intermediate array.
- assert(conv_params->round_0 >= 5);
-
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i data2 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
- // Filter even-index pixels
- const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
+ res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
+ res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
+ res_a_round1 = _mm_max_epi16(res_a_round1, zero);
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
+ *((uint32_t *)(&dst[i * dst_stride + j])) =
+ _mm_cvtsi128_si32(res_a_round0);
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const int16_t *data = &im_block[i * im_stride + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- // Accumulate values into the destination buffer
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- if (do_average) {
- _mm_storeu_si128(p + 0,
- _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
- _mm_storeu_si128(p + 1,
- _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
- } else {
- _mm_storeu_si128(p + 0, res_lo_round);
- _mm_storeu_si128(p + 1, res_hi_round);
+ *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+ _mm_cvtsi128_si32(res_a_round1);
}
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+
+ s[0 + 8] = s[1 + 8];
+ s[1 + 8] = s[2 + 8];
+ s[2 + 8] = s[3 + 8];
+
+ s[4 + 8] = s[5 + 8];
+ s[5 + 8] = s[6 + 8];
+ s[6 + 8] = s[7 + 8];
+
+ s6 = s8;
}
}
}
}
-#endif
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
index 0e833e6d9..debb05a6d 100644
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
@@ -11,8 +11,9 @@
#include <assert.h>
#include <immintrin.h>
-#include "./av1_rtcd.h"
-#include "./aom_config.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/av1_inv_txfm1d_cfg.h"
// Note:
@@ -85,17 +86,6 @@ static void load_buffer_32x32(const int32_t *coeff, __m256i *in) {
}
}
-static void round_shift_32x32(__m256i *in, int shift) {
- __m256i rnding = _mm256_set1_epi32(1 << (shift - 1));
- int i = 0;
-
- while (i < 128) {
- in[i] = _mm256_add_epi32(in[i], rnding);
- in[i] = _mm256_srai_epi32(in[i], shift);
- i++;
- }
-}
-
static __m256i highbd_clamp_epi32(__m256i x, int bd) {
const __m256i zero = _mm256_setzero_si256();
const __m256i one = _mm256_set1_epi16(1);
@@ -120,7 +110,7 @@ static void write_buffer_32x32(__m256i *in, uint16_t *output, int stride,
(void)fliplr;
(void)flipud;
- round_shift_32x32(in, shift);
+ __m256i round = _mm256_set1_epi32((1 << shift) >> 1);
while (i < 128) {
u0 = _mm256_loadu_si256((const __m256i *)output);
@@ -136,6 +126,16 @@ static void write_buffer_32x32(__m256i *in, uint16_t *output, int stride,
v2 = _mm256_permute2f128_si256(in[i + 2], in[i + 3], 0x20);
v3 = _mm256_permute2f128_si256(in[i + 2], in[i + 3], 0x31);
+ v0 = _mm256_add_epi32(v0, round);
+ v1 = _mm256_add_epi32(v1, round);
+ v2 = _mm256_add_epi32(v2, round);
+ v3 = _mm256_add_epi32(v3, round);
+
+ v0 = _mm256_sra_epi32(v0, _mm_cvtsi32_si128(shift));
+ v1 = _mm256_sra_epi32(v1, _mm_cvtsi32_si128(shift));
+ v2 = _mm256_sra_epi32(v2, _mm_cvtsi32_si128(shift));
+ v3 = _mm256_sra_epi32(v3, _mm_cvtsi32_si128(shift));
+
v0 = _mm256_add_epi32(v0, x0);
v1 = _mm256_add_epi32(v1, x1);
v2 = _mm256_add_epi32(v2, x2);
@@ -167,7 +167,53 @@ static INLINE __m256i half_btf_avx2(const __m256i *w0, const __m256i *n0,
return x;
}
-static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
+static void addsub_avx2(const __m256i in0, const __m256i in1, __m256i *out0,
+ __m256i *out1, const __m256i *clamp_lo,
+ const __m256i *clamp_hi) {
+ __m256i a0 = _mm256_add_epi32(in0, in1);
+ __m256i a1 = _mm256_sub_epi32(in0, in1);
+
+ a0 = _mm256_max_epi32(a0, *clamp_lo);
+ a0 = _mm256_min_epi32(a0, *clamp_hi);
+ a1 = _mm256_max_epi32(a1, *clamp_lo);
+ a1 = _mm256_min_epi32(a1, *clamp_hi);
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void addsub_no_clamp_avx2(const __m256i in0, const __m256i in1,
+ __m256i *out0, __m256i *out1) {
+ __m256i a0 = _mm256_add_epi32(in0, in1);
+ __m256i a1 = _mm256_sub_epi32(in0, in1);
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void addsub_shift_avx2(const __m256i in0, const __m256i in1,
+ __m256i *out0, __m256i *out1,
+ const __m256i *clamp_lo, const __m256i *clamp_hi,
+ int shift) {
+ __m256i offset = _mm256_set1_epi32((1 << shift) >> 1);
+ __m256i in0_w_offset = _mm256_add_epi32(in0, offset);
+ __m256i a0 = _mm256_add_epi32(in0_w_offset, in1);
+ __m256i a1 = _mm256_sub_epi32(in0_w_offset, in1);
+
+ a0 = _mm256_max_epi32(a0, *clamp_lo);
+ a0 = _mm256_min_epi32(a0, *clamp_hi);
+ a1 = _mm256_max_epi32(a1, *clamp_lo);
+ a1 = _mm256_min_epi32(a1, *clamp_hi);
+
+ a0 = _mm256_sra_epi32(a0, _mm_cvtsi32_si128(shift));
+ a1 = _mm256_sra_epi32(a1, _mm_cvtsi32_si128(shift));
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
+ int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const __m256i cospi62 = _mm256_set1_epi32(cospi[62]);
const __m256i cospi30 = _mm256_set1_epi32(cospi[30]);
@@ -220,6 +266,9 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
+ const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
__m256i bf1[32], bf0[32];
int col;
@@ -334,22 +383,15 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
half_btf_avx2(&cospi36, &bf0[9], &cospi28, &bf0[14], &rounding, bit);
bf1[15] =
half_btf_avx2(&cospi4, &bf0[8], &cospi60, &bf0[15], &rounding, bit);
- bf1[16] = _mm256_add_epi32(bf0[16], bf0[17]);
- bf1[17] = _mm256_sub_epi32(bf0[16], bf0[17]);
- bf1[18] = _mm256_sub_epi32(bf0[19], bf0[18]);
- bf1[19] = _mm256_add_epi32(bf0[18], bf0[19]);
- bf1[20] = _mm256_add_epi32(bf0[20], bf0[21]);
- bf1[21] = _mm256_sub_epi32(bf0[20], bf0[21]);
- bf1[22] = _mm256_sub_epi32(bf0[23], bf0[22]);
- bf1[23] = _mm256_add_epi32(bf0[22], bf0[23]);
- bf1[24] = _mm256_add_epi32(bf0[24], bf0[25]);
- bf1[25] = _mm256_sub_epi32(bf0[24], bf0[25]);
- bf1[26] = _mm256_sub_epi32(bf0[27], bf0[26]);
- bf1[27] = _mm256_add_epi32(bf0[26], bf0[27]);
- bf1[28] = _mm256_add_epi32(bf0[28], bf0[29]);
- bf1[29] = _mm256_sub_epi32(bf0[28], bf0[29]);
- bf1[30] = _mm256_sub_epi32(bf0[31], bf0[30]);
- bf1[31] = _mm256_add_epi32(bf0[30], bf0[31]);
+
+ addsub_avx2(bf0[16], bf0[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[19], bf0[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[20], bf0[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[23], bf0[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[24], bf0[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[27], bf0[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[28], bf0[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[31], bf0[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi);
// stage 4
bf0[0] = bf1[0];
@@ -363,14 +405,12 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
bf0[6] =
half_btf_avx2(&cospi40, &bf1[5], &cospi24, &bf1[6], &rounding, bit);
bf0[7] = half_btf_avx2(&cospi8, &bf1[4], &cospi56, &bf1[7], &rounding, bit);
- bf0[8] = _mm256_add_epi32(bf1[8], bf1[9]);
- bf0[9] = _mm256_sub_epi32(bf1[8], bf1[9]);
- bf0[10] = _mm256_sub_epi32(bf1[11], bf1[10]);
- bf0[11] = _mm256_add_epi32(bf1[10], bf1[11]);
- bf0[12] = _mm256_add_epi32(bf1[12], bf1[13]);
- bf0[13] = _mm256_sub_epi32(bf1[12], bf1[13]);
- bf0[14] = _mm256_sub_epi32(bf1[15], bf1[14]);
- bf0[15] = _mm256_add_epi32(bf1[14], bf1[15]);
+
+ addsub_avx2(bf1[8], bf1[9], bf0 + 8, bf0 + 9, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[11], bf1[10], bf0 + 11, bf0 + 10, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[12], bf1[13], bf0 + 12, bf0 + 13, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[15], bf1[14], bf0 + 15, bf0 + 14, &clamp_lo, &clamp_hi);
+
bf0[16] = bf1[16];
bf0[17] =
half_btf_avx2(&cospim8, &bf1[17], &cospi56, &bf1[30], &rounding, bit);
@@ -405,10 +445,8 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
half_btf_avx2(&cospi48, &bf0[2], &cospim16, &bf0[3], &rounding, bit);
bf1[3] =
half_btf_avx2(&cospi16, &bf0[2], &cospi48, &bf0[3], &rounding, bit);
- bf1[4] = _mm256_add_epi32(bf0[4], bf0[5]);
- bf1[5] = _mm256_sub_epi32(bf0[4], bf0[5]);
- bf1[6] = _mm256_sub_epi32(bf0[7], bf0[6]);
- bf1[7] = _mm256_add_epi32(bf0[6], bf0[7]);
+ addsub_avx2(bf0[4], bf0[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[7], bf0[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi);
bf1[8] = bf0[8];
bf1[9] =
half_btf_avx2(&cospim16, &bf0[9], &cospi48, &bf0[14], &rounding, bit);
@@ -421,42 +459,28 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
bf1[14] =
half_btf_avx2(&cospi48, &bf0[9], &cospi16, &bf0[14], &rounding, bit);
bf1[15] = bf0[15];
- bf1[16] = _mm256_add_epi32(bf0[16], bf0[19]);
- bf1[17] = _mm256_add_epi32(bf0[17], bf0[18]);
- bf1[18] = _mm256_sub_epi32(bf0[17], bf0[18]);
- bf1[19] = _mm256_sub_epi32(bf0[16], bf0[19]);
- bf1[20] = _mm256_sub_epi32(bf0[23], bf0[20]);
- bf1[21] = _mm256_sub_epi32(bf0[22], bf0[21]);
- bf1[22] = _mm256_add_epi32(bf0[21], bf0[22]);
- bf1[23] = _mm256_add_epi32(bf0[20], bf0[23]);
- bf1[24] = _mm256_add_epi32(bf0[24], bf0[27]);
- bf1[25] = _mm256_add_epi32(bf0[25], bf0[26]);
- bf1[26] = _mm256_sub_epi32(bf0[25], bf0[26]);
- bf1[27] = _mm256_sub_epi32(bf0[24], bf0[27]);
- bf1[28] = _mm256_sub_epi32(bf0[31], bf0[28]);
- bf1[29] = _mm256_sub_epi32(bf0[30], bf0[29]);
- bf1[30] = _mm256_add_epi32(bf0[29], bf0[30]);
- bf1[31] = _mm256_add_epi32(bf0[28], bf0[31]);
+ addsub_avx2(bf0[16], bf0[19], bf1 + 16, bf1 + 19, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[17], bf0[18], bf1 + 17, bf1 + 18, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[23], bf0[20], bf1 + 23, bf1 + 20, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[22], bf0[21], bf1 + 22, bf1 + 21, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[24], bf0[27], bf1 + 24, bf1 + 27, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[25], bf0[26], bf1 + 25, bf1 + 26, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[31], bf0[28], bf1 + 31, bf1 + 28, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[30], bf0[29], bf1 + 30, bf1 + 29, &clamp_lo, &clamp_hi);
// stage 6
- bf0[0] = _mm256_add_epi32(bf1[0], bf1[3]);
- bf0[1] = _mm256_add_epi32(bf1[1], bf1[2]);
- bf0[2] = _mm256_sub_epi32(bf1[1], bf1[2]);
- bf0[3] = _mm256_sub_epi32(bf1[0], bf1[3]);
+ addsub_avx2(bf1[0], bf1[3], bf0 + 0, bf0 + 3, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[1], bf1[2], bf0 + 1, bf0 + 2, &clamp_lo, &clamp_hi);
bf0[4] = bf1[4];
bf0[5] =
half_btf_avx2(&cospim32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
bf0[6] =
half_btf_avx2(&cospi32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
bf0[7] = bf1[7];
- bf0[8] = _mm256_add_epi32(bf1[8], bf1[11]);
- bf0[9] = _mm256_add_epi32(bf1[9], bf1[10]);
- bf0[10] = _mm256_sub_epi32(bf1[9], bf1[10]);
- bf0[11] = _mm256_sub_epi32(bf1[8], bf1[11]);
- bf0[12] = _mm256_sub_epi32(bf1[15], bf1[12]);
- bf0[13] = _mm256_sub_epi32(bf1[14], bf1[13]);
- bf0[14] = _mm256_add_epi32(bf1[13], bf1[14]);
- bf0[15] = _mm256_add_epi32(bf1[12], bf1[15]);
+ addsub_avx2(bf1[8], bf1[11], bf0 + 8, bf0 + 11, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[9], bf1[10], bf0 + 9, bf0 + 10, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[15], bf1[12], bf0 + 15, bf0 + 12, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[14], bf1[13], bf0 + 14, bf0 + 13, &clamp_lo, &clamp_hi);
bf0[16] = bf1[16];
bf0[17] = bf1[17];
bf0[18] =
@@ -483,14 +507,10 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
bf0[31] = bf1[31];
// stage 7
- bf1[0] = _mm256_add_epi32(bf0[0], bf0[7]);
- bf1[1] = _mm256_add_epi32(bf0[1], bf0[6]);
- bf1[2] = _mm256_add_epi32(bf0[2], bf0[5]);
- bf1[3] = _mm256_add_epi32(bf0[3], bf0[4]);
- bf1[4] = _mm256_sub_epi32(bf0[3], bf0[4]);
- bf1[5] = _mm256_sub_epi32(bf0[2], bf0[5]);
- bf1[6] = _mm256_sub_epi32(bf0[1], bf0[6]);
- bf1[7] = _mm256_sub_epi32(bf0[0], bf0[7]);
+ addsub_avx2(bf0[0], bf0[7], bf1 + 0, bf1 + 7, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[1], bf0[6], bf1 + 1, bf1 + 6, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[2], bf0[5], bf1 + 2, bf1 + 5, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[3], bf0[4], bf1 + 3, bf1 + 4, &clamp_lo, &clamp_hi);
bf1[8] = bf0[8];
bf1[9] = bf0[9];
bf1[10] =
@@ -503,40 +523,24 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
half_btf_avx2(&cospi32, &bf0[10], &cospi32, &bf0[13], &rounding, bit);
bf1[14] = bf0[14];
bf1[15] = bf0[15];
- bf1[16] = _mm256_add_epi32(bf0[16], bf0[23]);
- bf1[17] = _mm256_add_epi32(bf0[17], bf0[22]);
- bf1[18] = _mm256_add_epi32(bf0[18], bf0[21]);
- bf1[19] = _mm256_add_epi32(bf0[19], bf0[20]);
- bf1[20] = _mm256_sub_epi32(bf0[19], bf0[20]);
- bf1[21] = _mm256_sub_epi32(bf0[18], bf0[21]);
- bf1[22] = _mm256_sub_epi32(bf0[17], bf0[22]);
- bf1[23] = _mm256_sub_epi32(bf0[16], bf0[23]);
- bf1[24] = _mm256_sub_epi32(bf0[31], bf0[24]);
- bf1[25] = _mm256_sub_epi32(bf0[30], bf0[25]);
- bf1[26] = _mm256_sub_epi32(bf0[29], bf0[26]);
- bf1[27] = _mm256_sub_epi32(bf0[28], bf0[27]);
- bf1[28] = _mm256_add_epi32(bf0[27], bf0[28]);
- bf1[29] = _mm256_add_epi32(bf0[26], bf0[29]);
- bf1[30] = _mm256_add_epi32(bf0[25], bf0[30]);
- bf1[31] = _mm256_add_epi32(bf0[24], bf0[31]);
+ addsub_avx2(bf0[16], bf0[23], bf1 + 16, bf1 + 23, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[17], bf0[22], bf1 + 17, bf1 + 22, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[18], bf0[21], bf1 + 18, bf1 + 21, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[19], bf0[20], bf1 + 19, bf1 + 20, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[31], bf0[24], bf1 + 31, bf1 + 24, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[30], bf0[25], bf1 + 30, bf1 + 25, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[29], bf0[26], bf1 + 29, bf1 + 26, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf0[28], bf0[27], bf1 + 28, bf1 + 27, &clamp_lo, &clamp_hi);
// stage 8
- bf0[0] = _mm256_add_epi32(bf1[0], bf1[15]);
- bf0[1] = _mm256_add_epi32(bf1[1], bf1[14]);
- bf0[2] = _mm256_add_epi32(bf1[2], bf1[13]);
- bf0[3] = _mm256_add_epi32(bf1[3], bf1[12]);
- bf0[4] = _mm256_add_epi32(bf1[4], bf1[11]);
- bf0[5] = _mm256_add_epi32(bf1[5], bf1[10]);
- bf0[6] = _mm256_add_epi32(bf1[6], bf1[9]);
- bf0[7] = _mm256_add_epi32(bf1[7], bf1[8]);
- bf0[8] = _mm256_sub_epi32(bf1[7], bf1[8]);
- bf0[9] = _mm256_sub_epi32(bf1[6], bf1[9]);
- bf0[10] = _mm256_sub_epi32(bf1[5], bf1[10]);
- bf0[11] = _mm256_sub_epi32(bf1[4], bf1[11]);
- bf0[12] = _mm256_sub_epi32(bf1[3], bf1[12]);
- bf0[13] = _mm256_sub_epi32(bf1[2], bf1[13]);
- bf0[14] = _mm256_sub_epi32(bf1[1], bf1[14]);
- bf0[15] = _mm256_sub_epi32(bf1[0], bf1[15]);
+ addsub_avx2(bf1[0], bf1[15], bf0 + 0, bf0 + 15, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[1], bf1[14], bf0 + 1, bf0 + 14, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[2], bf1[13], bf0 + 2, bf0 + 13, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[3], bf1[12], bf0 + 3, bf0 + 12, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[4], bf1[11], bf0 + 4, bf0 + 11, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[5], bf1[10], bf0 + 5, bf0 + 10, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[6], bf1[9], bf0 + 6, bf0 + 9, &clamp_lo, &clamp_hi);
+ addsub_avx2(bf1[7], bf1[8], bf0 + 7, bf0 + 8, &clamp_lo, &clamp_hi);
bf0[16] = bf1[16];
bf0[17] = bf1[17];
bf0[18] = bf1[18];
@@ -563,58 +567,91 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
bf0[31] = bf1[31];
// stage 9
- out[0 * 4 + col] = _mm256_add_epi32(bf0[0], bf0[31]);
- out[1 * 4 + col] = _mm256_add_epi32(bf0[1], bf0[30]);
- out[2 * 4 + col] = _mm256_add_epi32(bf0[2], bf0[29]);
- out[3 * 4 + col] = _mm256_add_epi32(bf0[3], bf0[28]);
- out[4 * 4 + col] = _mm256_add_epi32(bf0[4], bf0[27]);
- out[5 * 4 + col] = _mm256_add_epi32(bf0[5], bf0[26]);
- out[6 * 4 + col] = _mm256_add_epi32(bf0[6], bf0[25]);
- out[7 * 4 + col] = _mm256_add_epi32(bf0[7], bf0[24]);
- out[8 * 4 + col] = _mm256_add_epi32(bf0[8], bf0[23]);
- out[9 * 4 + col] = _mm256_add_epi32(bf0[9], bf0[22]);
- out[10 * 4 + col] = _mm256_add_epi32(bf0[10], bf0[21]);
- out[11 * 4 + col] = _mm256_add_epi32(bf0[11], bf0[20]);
- out[12 * 4 + col] = _mm256_add_epi32(bf0[12], bf0[19]);
- out[13 * 4 + col] = _mm256_add_epi32(bf0[13], bf0[18]);
- out[14 * 4 + col] = _mm256_add_epi32(bf0[14], bf0[17]);
- out[15 * 4 + col] = _mm256_add_epi32(bf0[15], bf0[16]);
- out[16 * 4 + col] = _mm256_sub_epi32(bf0[15], bf0[16]);
- out[17 * 4 + col] = _mm256_sub_epi32(bf0[14], bf0[17]);
- out[18 * 4 + col] = _mm256_sub_epi32(bf0[13], bf0[18]);
- out[19 * 4 + col] = _mm256_sub_epi32(bf0[12], bf0[19]);
- out[20 * 4 + col] = _mm256_sub_epi32(bf0[11], bf0[20]);
- out[21 * 4 + col] = _mm256_sub_epi32(bf0[10], bf0[21]);
- out[22 * 4 + col] = _mm256_sub_epi32(bf0[9], bf0[22]);
- out[23 * 4 + col] = _mm256_sub_epi32(bf0[8], bf0[23]);
- out[24 * 4 + col] = _mm256_sub_epi32(bf0[7], bf0[24]);
- out[25 * 4 + col] = _mm256_sub_epi32(bf0[6], bf0[25]);
- out[26 * 4 + col] = _mm256_sub_epi32(bf0[5], bf0[26]);
- out[27 * 4 + col] = _mm256_sub_epi32(bf0[4], bf0[27]);
- out[28 * 4 + col] = _mm256_sub_epi32(bf0[3], bf0[28]);
- out[29 * 4 + col] = _mm256_sub_epi32(bf0[2], bf0[29]);
- out[30 * 4 + col] = _mm256_sub_epi32(bf0[1], bf0[30]);
- out[31 * 4 + col] = _mm256_sub_epi32(bf0[0], bf0[31]);
+ if (do_cols) {
+ addsub_no_clamp_avx2(bf0[0], bf0[31], out + 0 * 4 + col,
+ out + 31 * 4 + col);
+ addsub_no_clamp_avx2(bf0[1], bf0[30], out + 1 * 4 + col,
+ out + 30 * 4 + col);
+ addsub_no_clamp_avx2(bf0[2], bf0[29], out + 2 * 4 + col,
+ out + 29 * 4 + col);
+ addsub_no_clamp_avx2(bf0[3], bf0[28], out + 3 * 4 + col,
+ out + 28 * 4 + col);
+ addsub_no_clamp_avx2(bf0[4], bf0[27], out + 4 * 4 + col,
+ out + 27 * 4 + col);
+ addsub_no_clamp_avx2(bf0[5], bf0[26], out + 5 * 4 + col,
+ out + 26 * 4 + col);
+ addsub_no_clamp_avx2(bf0[6], bf0[25], out + 6 * 4 + col,
+ out + 25 * 4 + col);
+ addsub_no_clamp_avx2(bf0[7], bf0[24], out + 7 * 4 + col,
+ out + 24 * 4 + col);
+ addsub_no_clamp_avx2(bf0[8], bf0[23], out + 8 * 4 + col,
+ out + 23 * 4 + col);
+ addsub_no_clamp_avx2(bf0[9], bf0[22], out + 9 * 4 + col,
+ out + 22 * 4 + col);
+ addsub_no_clamp_avx2(bf0[10], bf0[21], out + 10 * 4 + col,
+ out + 21 * 4 + col);
+ addsub_no_clamp_avx2(bf0[11], bf0[20], out + 11 * 4 + col,
+ out + 20 * 4 + col);
+ addsub_no_clamp_avx2(bf0[12], bf0[19], out + 12 * 4 + col,
+ out + 19 * 4 + col);
+ addsub_no_clamp_avx2(bf0[13], bf0[18], out + 13 * 4 + col,
+ out + 18 * 4 + col);
+ addsub_no_clamp_avx2(bf0[14], bf0[17], out + 14 * 4 + col,
+ out + 17 * 4 + col);
+ addsub_no_clamp_avx2(bf0[15], bf0[16], out + 15 * 4 + col,
+ out + 16 * 4 + col);
+ } else {
+ addsub_shift_avx2(bf0[0], bf0[31], out + 0 * 4 + col, out + 31 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[1], bf0[30], out + 1 * 4 + col, out + 30 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[2], bf0[29], out + 2 * 4 + col, out + 29 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[3], bf0[28], out + 3 * 4 + col, out + 28 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[4], bf0[27], out + 4 * 4 + col, out + 27 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[5], bf0[26], out + 5 * 4 + col, out + 26 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[6], bf0[25], out + 6 * 4 + col, out + 25 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[7], bf0[24], out + 7 * 4 + col, out + 24 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[8], bf0[23], out + 8 * 4 + col, out + 23 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[9], bf0[22], out + 9 * 4 + col, out + 22 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[10], bf0[21], out + 10 * 4 + col,
+ out + 21 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[11], bf0[20], out + 11 * 4 + col,
+ out + 20 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[12], bf0[19], out + 12 * 4 + col,
+ out + 19 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[13], bf0[18], out + 13 * 4 + col,
+ out + 18 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[14], bf0[17], out + 14 * 4 + col,
+ out + 17 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_avx2(bf0[15], bf0[16], out + 15 * 4 + col,
+ out + 16 * 4 + col, &clamp_lo, &clamp_hi, out_shift);
+ }
}
}
void av1_inv_txfm2d_add_32x32_avx2(const int32_t *coeff, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
__m256i in[128], out[128];
- const TXFM_1D_CFG *row_cfg = NULL;
- const TXFM_1D_CFG *col_cfg = NULL;
+ const int8_t *shift = inv_txfm_shift_ls[TX_32X32];
+ const int txw_idx = get_txw_idx(TX_32X32);
+ const int txh_idx = get_txh_idx(TX_32X32);
switch (tx_type) {
case DCT_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_32;
- col_cfg = &inv_txfm_1d_col_cfg_dct_32;
load_buffer_32x32(coeff, in);
transpose_32x32(in, out);
- idct32_avx2(out, in, row_cfg->cos_bit[2]);
- round_shift_32x32(in, -row_cfg->shift[0]);
+ idct32_avx2(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, -shift[0]);
transpose_32x32(in, out);
- idct32_avx2(out, in, col_cfg->cos_bit[2]);
- write_buffer_32x32(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct32_avx2(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_32x32(in, output, stride, 0, 0, -shift[1], bd);
break;
default: assert(0);
}
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
index 8613bed86..801a4133b 100644
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -11,8 +11,9 @@
#include <assert.h>
#include <smmintrin.h> /* SSE4.1 */
-#include "./av1_rtcd.h"
-#include "./aom_config.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
@@ -23,13 +24,82 @@ static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
}
-static void idct4x4_sse4_1(__m128i *in, int bit) {
+static void addsub_sse4_1(const __m128i in0, const __m128i in1, __m128i *out0,
+ __m128i *out1, const __m128i *clamp_lo,
+ const __m128i *clamp_hi) {
+ __m128i a0 = _mm_add_epi32(in0, in1);
+ __m128i a1 = _mm_sub_epi32(in0, in1);
+
+ a0 = _mm_max_epi32(a0, *clamp_lo);
+ a0 = _mm_min_epi32(a0, *clamp_hi);
+ a1 = _mm_max_epi32(a1, *clamp_lo);
+ a1 = _mm_min_epi32(a1, *clamp_hi);
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void addsub_no_clamp_sse4_1(const __m128i in0, const __m128i in1,
+ __m128i *out0, __m128i *out1) {
+ __m128i a0 = _mm_add_epi32(in0, in1);
+ __m128i a1 = _mm_sub_epi32(in0, in1);
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void addsub_shift_sse4_1(const __m128i in0, const __m128i in1,
+ __m128i *out0, __m128i *out1,
+ const __m128i *clamp_lo,
+ const __m128i *clamp_hi, int shift) {
+ __m128i offset = _mm_set1_epi32((1 << shift) >> 1);
+ __m128i in0_w_offset = _mm_add_epi32(in0, offset);
+ __m128i a0 = _mm_add_epi32(in0_w_offset, in1);
+ __m128i a1 = _mm_sub_epi32(in0_w_offset, in1);
+
+ a0 = _mm_max_epi32(a0, *clamp_lo);
+ a0 = _mm_min_epi32(a0, *clamp_hi);
+ a1 = _mm_max_epi32(a1, *clamp_lo);
+ a1 = _mm_min_epi32(a1, *clamp_hi);
+
+ a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift));
+ a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift));
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void neg_shift_sse4_1(const __m128i in0, const __m128i in1,
+ __m128i *out0, __m128i *out1,
+ const __m128i *clamp_lo, const __m128i *clamp_hi,
+ int shift) {
+ __m128i offset = _mm_set1_epi32((1 << shift) >> 1);
+ __m128i a0 = _mm_add_epi32(offset, in0);
+ __m128i a1 = _mm_sub_epi32(offset, in1);
+
+ a0 = _mm_max_epi32(a0, *clamp_lo);
+ a0 = _mm_min_epi32(a0, *clamp_hi);
+ a1 = _mm_max_epi32(a1, *clamp_lo);
+ a1 = _mm_min_epi32(a1, *clamp_hi);
+
+ a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift));
+ a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift));
+
+ *out0 = a0;
+ *out1 = a1;
+}
+
+static void idct4x4_sse4_1(__m128i *in, int bit, int do_cols, int bd) {
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
+
__m128i u0, u1, u2, u3;
__m128i v0, v1, v2, v3, x, y;
@@ -65,84 +135,72 @@ static void idct4x4_sse4_1(__m128i *in, int bit) {
v3 = _mm_add_epi32(v3, rnding);
v3 = _mm_srai_epi32(v3, bit);
- in[0] = _mm_add_epi32(v0, v3);
- in[1] = _mm_add_epi32(v1, v2);
- in[2] = _mm_sub_epi32(v1, v2);
- in[3] = _mm_sub_epi32(v0, v3);
+ addsub_sse4_1(v0, v3, in + 0, in + 3, &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v1, v2, in + 1, in + 2, &clamp_lo, &clamp_hi);
}
static void iadst4x4_sse4_1(__m128i *in, int bit) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const int32_t *sinpi = sinpi_arr(bit);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
+ const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
+ const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
+ const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
+ const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
+ __m128i t;
+ __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+ __m128i x0, x1, x2, x3;
__m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3, x, y;
+ __m128i v0, v1, v2, v3;
v0 = _mm_unpacklo_epi32(in[0], in[1]);
v1 = _mm_unpackhi_epi32(in[0], in[1]);
v2 = _mm_unpacklo_epi32(in[2], in[3]);
v3 = _mm_unpackhi_epi32(in[2], in[3]);
- u0 = _mm_unpacklo_epi64(v0, v2);
- u1 = _mm_unpackhi_epi64(v0, v2);
- u2 = _mm_unpacklo_epi64(v1, v3);
- u3 = _mm_unpackhi_epi64(v1, v3);
-
- // stage 0
- // stage 1
- u1 = _mm_sub_epi32(zero, u1);
- u3 = _mm_sub_epi32(zero, u3);
-
- // stage 2
- v0 = u0;
- v1 = u3;
- x = _mm_mullo_epi32(u1, cospi32);
- y = _mm_mullo_epi32(u2, cospi32);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- v3 = _mm_sub_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- // stage 3
- u0 = _mm_add_epi32(v0, v2);
- u1 = _mm_add_epi32(v1, v3);
- u2 = _mm_sub_epi32(v0, v2);
- u3 = _mm_sub_epi32(v1, v3);
-
- // stage 4
- x = _mm_mullo_epi32(u0, cospi8);
- y = _mm_mullo_epi32(u1, cospi56);
- in[3] = _mm_add_epi32(x, y);
- in[3] = _mm_add_epi32(in[3], rnding);
- in[3] = _mm_srai_epi32(in[3], bit);
-
- x = _mm_mullo_epi32(u0, cospi56);
- y = _mm_mullo_epi32(u1, cospim8);
- in[0] = _mm_add_epi32(x, y);
- in[0] = _mm_add_epi32(in[0], rnding);
- in[0] = _mm_srai_epi32(in[0], bit);
-
- x = _mm_mullo_epi32(u2, cospi40);
- y = _mm_mullo_epi32(u3, cospi24);
- in[1] = _mm_add_epi32(x, y);
- in[1] = _mm_add_epi32(in[1], rnding);
- in[1] = _mm_srai_epi32(in[1], bit);
-
- x = _mm_mullo_epi32(u2, cospi24);
- y = _mm_mullo_epi32(u3, cospim40);
- in[2] = _mm_add_epi32(x, y);
- in[2] = _mm_add_epi32(in[2], rnding);
- in[2] = _mm_srai_epi32(in[2], bit);
+ x0 = _mm_unpacklo_epi64(v0, v2);
+ x1 = _mm_unpackhi_epi64(v0, v2);
+ x2 = _mm_unpacklo_epi64(v1, v3);
+ x3 = _mm_unpackhi_epi64(v1, v3);
+
+ s0 = _mm_mullo_epi32(x0, sinpi1);
+ s1 = _mm_mullo_epi32(x0, sinpi2);
+ s2 = _mm_mullo_epi32(x1, sinpi3);
+ s3 = _mm_mullo_epi32(x2, sinpi4);
+ s4 = _mm_mullo_epi32(x2, sinpi1);
+ s5 = _mm_mullo_epi32(x3, sinpi2);
+ s6 = _mm_mullo_epi32(x3, sinpi4);
+ t = _mm_sub_epi32(x0, x2);
+ s7 = _mm_add_epi32(t, x3);
+
+ t = _mm_add_epi32(s0, s3);
+ s0 = _mm_add_epi32(t, s5);
+ t = _mm_sub_epi32(s1, s4);
+ s1 = _mm_sub_epi32(t, s6);
+ s3 = s2;
+ s2 = _mm_mullo_epi32(s7, sinpi3);
+
+ u0 = _mm_add_epi32(s0, s3);
+ u1 = _mm_add_epi32(s1, s3);
+ u2 = s2;
+ t = _mm_add_epi32(s0, s1);
+ u3 = _mm_sub_epi32(t, s3);
+
+ u0 = _mm_add_epi32(u0, rnding);
+ u0 = _mm_srai_epi32(u0, bit);
+
+ u1 = _mm_add_epi32(u1, rnding);
+ u1 = _mm_srai_epi32(u1, bit);
+
+ u2 = _mm_add_epi32(u2, rnding);
+ u2 = _mm_srai_epi32(u2, bit);
+
+ u3 = _mm_add_epi32(u3, rnding);
+ u3 = _mm_srai_epi32(u3, bit);
+
+ in[0] = u0;
+ in[1] = u1;
+ in[2] = u2;
+ in[3] = u3;
}
static INLINE void round_shift_4x4(__m128i *in, int shift) {
@@ -232,84 +290,65 @@ static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[4];
- const TXFM_1D_CFG *row_cfg = NULL;
- const TXFM_1D_CFG *col_cfg = NULL;
+ const int8_t *shift = inv_txfm_shift_ls[TX_4X4];
+ const int txw_idx = get_txw_idx(TX_4X4);
+ const int txh_idx = get_txh_idx(TX_4X4);
switch (tx_type) {
case DCT_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_4;
- col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
- idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
+ idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
+ write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
break;
case DCT_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
+ write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
break;
-#if CONFIG_EXT_TX
case FLIPADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
break;
case DCT_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_dct_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- idct4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
+ write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
break;
case FLIPADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 1, 1, -shift[1], bd);
break;
case ADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
break;
case FLIPADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_4;
- col_cfg = &inv_txfm_1d_col_cfg_adst_4;
load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
- iadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
- write_buffer_4x4(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx]);
+ iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx]);
+ write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
break;
-#endif // CONFIG_EXT_TX
default: assert(0);
}
}
@@ -334,7 +373,8 @@ static void load_buffer_8x8(const int32_t *coeff, __m128i *in) {
in[15] = _mm_load_si128((const __m128i *)(coeff + 60));
}
-static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
+ int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
@@ -347,6 +387,9 @@ static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u0, u1, u2, u3, u4, u5, u6, u7;
__m128i v0, v1, v2, v3, v4, v5, v6, v7;
__m128i x, y;
@@ -413,16 +456,12 @@ static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
v3 = _mm_add_epi32(v3, rnding);
v3 = _mm_srai_epi32(v3, bit);
- v4 = _mm_add_epi32(u4, u5);
- v5 = _mm_sub_epi32(u4, u5);
- v6 = _mm_sub_epi32(u7, u6);
- v7 = _mm_add_epi32(u6, u7);
+ addsub_sse4_1(u4, u5, &v4, &v5, &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u7, u6, &v7, &v6, &clamp_lo, &clamp_hi);
// stage 4
- u0 = _mm_add_epi32(v0, v3);
- u1 = _mm_add_epi32(v1, v2);
- u2 = _mm_sub_epi32(v1, v2);
- u3 = _mm_sub_epi32(v0, v3);
+ addsub_sse4_1(v0, v3, &u0, &u3, &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v1, v2, &u1, &u2, &clamp_lo, &clamp_hi);
u4 = v4;
u7 = v7;
@@ -437,195 +476,334 @@ static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
u5 = _mm_srai_epi32(u5, bit);
// stage 5
- out[0 * 2 + col] = _mm_add_epi32(u0, u7);
- out[1 * 2 + col] = _mm_add_epi32(u1, u6);
- out[2 * 2 + col] = _mm_add_epi32(u2, u5);
- out[3 * 2 + col] = _mm_add_epi32(u3, u4);
- out[4 * 2 + col] = _mm_sub_epi32(u3, u4);
- out[5 * 2 + col] = _mm_sub_epi32(u2, u5);
- out[6 * 2 + col] = _mm_sub_epi32(u1, u6);
- out[7 * 2 + col] = _mm_sub_epi32(u0, u7);
+ if (do_cols) {
+ addsub_no_clamp_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col);
+ addsub_no_clamp_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col);
+ addsub_no_clamp_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col);
+ addsub_no_clamp_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col);
+ } else {
+ addsub_shift_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ }
}
}
-static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
+ int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x, y;
- int col;
+ const __m128i kZero = _mm_setzero_si128();
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
+ __m128i u[8], v[8], x;
- // Note:
- // Even column: 0, 2, ..., 14
- // Odd column: 1, 3, ..., 15
- // one even column plus one odd column constructs one row (8 coeffs)
- // total we have 8 rows (8x8).
- for (col = 0; col < 2; ++col) {
- // stage 0
- // stage 1
- u0 = in[2 * 0 + col];
- u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
- u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
- u3 = in[2 * 4 + col];
- u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
- u5 = in[2 * 6 + col];
- u6 = in[2 * 2 + col];
- u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
-
- // stage 2
- v0 = u0;
- v1 = u1;
-
- x = _mm_mullo_epi32(u2, cospi32);
- y = _mm_mullo_epi32(u3, cospi32);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- v3 = _mm_sub_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- v4 = u4;
- v5 = u5;
-
- x = _mm_mullo_epi32(u6, cospi32);
- y = _mm_mullo_epi32(u7, cospi32);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- v7 = _mm_sub_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
-
- // stage 3
- u0 = _mm_add_epi32(v0, v2);
- u1 = _mm_add_epi32(v1, v3);
- u2 = _mm_sub_epi32(v0, v2);
- u3 = _mm_sub_epi32(v1, v3);
- u4 = _mm_add_epi32(v4, v6);
- u5 = _mm_add_epi32(v5, v7);
- u6 = _mm_sub_epi32(v4, v6);
- u7 = _mm_sub_epi32(v5, v7);
-
- // stage 4
- v0 = u0;
- v1 = u1;
- v2 = u2;
- v3 = u3;
-
- x = _mm_mullo_epi32(u4, cospi16);
- y = _mm_mullo_epi32(u5, cospi48);
- v4 = _mm_add_epi32(x, y);
- v4 = _mm_add_epi32(v4, rnding);
- v4 = _mm_srai_epi32(v4, bit);
-
- x = _mm_mullo_epi32(u4, cospi48);
- y = _mm_mullo_epi32(u5, cospim16);
- v5 = _mm_add_epi32(x, y);
- v5 = _mm_add_epi32(v5, rnding);
- v5 = _mm_srai_epi32(v5, bit);
-
- x = _mm_mullo_epi32(u6, cospim48);
- y = _mm_mullo_epi32(u7, cospi16);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- x = _mm_mullo_epi32(u6, cospi16);
- y = _mm_mullo_epi32(u7, cospi48);
- v7 = _mm_add_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
-
- // stage 5
- u0 = _mm_add_epi32(v0, v4);
- u1 = _mm_add_epi32(v1, v5);
- u2 = _mm_add_epi32(v2, v6);
- u3 = _mm_add_epi32(v3, v7);
- u4 = _mm_sub_epi32(v0, v4);
- u5 = _mm_sub_epi32(v1, v5);
- u6 = _mm_sub_epi32(v2, v6);
- u7 = _mm_sub_epi32(v3, v7);
-
- // stage 6
- x = _mm_mullo_epi32(u0, cospi4);
- y = _mm_mullo_epi32(u1, cospi60);
- v0 = _mm_add_epi32(x, y);
- v0 = _mm_add_epi32(v0, rnding);
- v0 = _mm_srai_epi32(v0, bit);
+ // Even 8 points: 0, 2, ..., 14
+ // stage 0
+ // stage 1
+ // stage 2
+ // (1)
+ u[0] = _mm_mullo_epi32(in[14], cospi4);
+ x = _mm_mullo_epi32(in[0], cospi60);
+ u[0] = _mm_add_epi32(u[0], x);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_mullo_epi32(in[14], cospi60);
+ x = _mm_mullo_epi32(in[0], cospi4);
+ u[1] = _mm_sub_epi32(u[1], x);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // (2)
+ u[2] = _mm_mullo_epi32(in[10], cospi20);
+ x = _mm_mullo_epi32(in[4], cospi44);
+ u[2] = _mm_add_epi32(u[2], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_mullo_epi32(in[10], cospi44);
+ x = _mm_mullo_epi32(in[4], cospi20);
+ u[3] = _mm_sub_epi32(u[3], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ // (3)
+ u[4] = _mm_mullo_epi32(in[6], cospi36);
+ x = _mm_mullo_epi32(in[8], cospi28);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(in[6], cospi28);
+ x = _mm_mullo_epi32(in[8], cospi36);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ // (4)
+ u[6] = _mm_mullo_epi32(in[2], cospi52);
+ x = _mm_mullo_epi32(in[12], cospi12);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(in[2], cospi12);
+ x = _mm_mullo_epi32(in[12], cospi52);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
- x = _mm_mullo_epi32(u0, cospi60);
- y = _mm_mullo_epi32(u1, cospim4);
- v1 = _mm_add_epi32(x, y);
- v1 = _mm_add_epi32(v1, rnding);
- v1 = _mm_srai_epi32(v1, bit);
+ // stage 3
+ addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi);
- x = _mm_mullo_epi32(u2, cospi20);
- y = _mm_mullo_epi32(u3, cospi44);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
+ // stage 4
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+
+ u[4] = _mm_mullo_epi32(v[4], cospi16);
+ x = _mm_mullo_epi32(v[5], cospi48);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(v[4], cospi48);
+ x = _mm_mullo_epi32(v[5], cospi16);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[6], cospim48);
+ x = _mm_mullo_epi32(v[7], cospi16);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(v[6], cospi16);
+ x = _mm_mullo_epi32(v[7], cospim48);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 5
+ addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi);
+
+ // stage 6
+ u[0] = v[0];
+ u[1] = v[1];
+ u[4] = v[4];
+ u[5] = v[5];
+
+ v[0] = _mm_mullo_epi32(v[2], cospi32);
+ x = _mm_mullo_epi32(v[3], cospi32);
+ u[2] = _mm_add_epi32(v[0], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_sub_epi32(v[0], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ v[0] = _mm_mullo_epi32(v[6], cospi32);
+ x = _mm_mullo_epi32(v[7], cospi32);
+ u[6] = _mm_add_epi32(v[0], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_sub_epi32(v[0], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 7
+ if (do_cols) {
+ out[0] = u[0];
+ out[2] = _mm_sub_epi32(kZero, u[4]);
+ out[4] = u[6];
+ out[6] = _mm_sub_epi32(kZero, u[2]);
+ out[8] = u[3];
+ out[10] = _mm_sub_epi32(kZero, u[7]);
+ out[12] = u[5];
+ out[14] = _mm_sub_epi32(kZero, u[1]);
+ } else {
+ neg_shift_sse4_1(u[0], u[4], out + 0, out + 2, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[6], u[2], out + 4, out + 6, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[3], u[7], out + 8, out + 10, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[5], u[1], out + 12, out + 14, &clamp_lo, &clamp_hi,
+ out_shift);
+ }
- x = _mm_mullo_epi32(u2, cospi44);
- y = _mm_mullo_epi32(u3, cospim20);
- v3 = _mm_add_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
+ // Odd 8 points: 1, 3, ..., 15
+ // stage 0
+ // stage 1
+ // stage 2
+ // (1)
+ u[0] = _mm_mullo_epi32(in[15], cospi4);
+ x = _mm_mullo_epi32(in[1], cospi60);
+ u[0] = _mm_add_epi32(u[0], x);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_mullo_epi32(in[15], cospi60);
+ x = _mm_mullo_epi32(in[1], cospi4);
+ u[1] = _mm_sub_epi32(u[1], x);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // (2)
+ u[2] = _mm_mullo_epi32(in[11], cospi20);
+ x = _mm_mullo_epi32(in[5], cospi44);
+ u[2] = _mm_add_epi32(u[2], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_mullo_epi32(in[11], cospi44);
+ x = _mm_mullo_epi32(in[5], cospi20);
+ u[3] = _mm_sub_epi32(u[3], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ // (3)
+ u[4] = _mm_mullo_epi32(in[7], cospi36);
+ x = _mm_mullo_epi32(in[9], cospi28);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(in[7], cospi28);
+ x = _mm_mullo_epi32(in[9], cospi36);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ // (4)
+ u[6] = _mm_mullo_epi32(in[3], cospi52);
+ x = _mm_mullo_epi32(in[13], cospi12);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(in[3], cospi12);
+ x = _mm_mullo_epi32(in[13], cospi52);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
- x = _mm_mullo_epi32(u4, cospi36);
- y = _mm_mullo_epi32(u5, cospi28);
- v4 = _mm_add_epi32(x, y);
- v4 = _mm_add_epi32(v4, rnding);
- v4 = _mm_srai_epi32(v4, bit);
-
- x = _mm_mullo_epi32(u4, cospi28);
- y = _mm_mullo_epi32(u5, cospim36);
- v5 = _mm_add_epi32(x, y);
- v5 = _mm_add_epi32(v5, rnding);
- v5 = _mm_srai_epi32(v5, bit);
-
- x = _mm_mullo_epi32(u6, cospi52);
- y = _mm_mullo_epi32(u7, cospi12);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- x = _mm_mullo_epi32(u6, cospi12);
- y = _mm_mullo_epi32(u7, cospim52);
- v7 = _mm_add_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
+ // stage 3
+ addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi);
- // stage 7
- out[2 * 0 + col] = v1;
- out[2 * 1 + col] = v6;
- out[2 * 2 + col] = v3;
- out[2 * 3 + col] = v4;
- out[2 * 4 + col] = v5;
- out[2 * 5 + col] = v2;
- out[2 * 6 + col] = v7;
- out[2 * 7 + col] = v0;
+ // stage 4
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+
+ u[4] = _mm_mullo_epi32(v[4], cospi16);
+ x = _mm_mullo_epi32(v[5], cospi48);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(v[4], cospi48);
+ x = _mm_mullo_epi32(v[5], cospi16);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[6], cospim48);
+ x = _mm_mullo_epi32(v[7], cospi16);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(v[6], cospi16);
+ x = _mm_mullo_epi32(v[7], cospim48);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 5
+ addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi);
+
+ // stage 6
+ u[0] = v[0];
+ u[1] = v[1];
+ u[4] = v[4];
+ u[5] = v[5];
+
+ v[0] = _mm_mullo_epi32(v[2], cospi32);
+ x = _mm_mullo_epi32(v[3], cospi32);
+ u[2] = _mm_add_epi32(v[0], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_sub_epi32(v[0], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ v[0] = _mm_mullo_epi32(v[6], cospi32);
+ x = _mm_mullo_epi32(v[7], cospi32);
+ u[6] = _mm_add_epi32(v[0], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_sub_epi32(v[0], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 7
+ if (do_cols) {
+ out[1] = u[0];
+ out[3] = _mm_sub_epi32(kZero, u[4]);
+ out[5] = u[6];
+ out[7] = _mm_sub_epi32(kZero, u[2]);
+ out[9] = u[3];
+ out[11] = _mm_sub_epi32(kZero, u[7]);
+ out[13] = u[5];
+ out[15] = _mm_sub_epi32(kZero, u[1]);
+ } else {
+ neg_shift_sse4_1(u[0], u[4], out + 1, out + 3, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[6], u[2], out + 5, out + 7, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[3], u[7], out + 9, out + 11, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(u[5], u[1], out + 13, out + 15, &clamp_lo, &clamp_hi,
+ out_shift);
}
}
@@ -708,102 +886,92 @@ static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[16], out[16];
- const TXFM_1D_CFG *row_cfg = NULL;
- const TXFM_1D_CFG *col_cfg = NULL;
+ const int8_t *shift = inv_txfm_shift_ls[TX_8X8];
+ const int txw_idx = get_txw_idx(TX_8X8);
+ const int txh_idx = get_txh_idx(TX_8X8);
switch (tx_type) {
case DCT_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_8;
- col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
case DCT_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
-#if CONFIG_EXT_TX
case FLIPADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
break;
case DCT_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_dct_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
break;
case ADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
break;
case FLIPADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd);
break;
case FLIPADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_8;
- col_cfg = &inv_txfm_1d_col_cfg_adst_8;
load_buffer_8x8(coeff, in);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_8x8(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
break;
-#endif // CONFIG_EXT_TX
default: assert(0);
}
}
@@ -868,7 +1036,8 @@ static void write_buffer_16x16(__m128i *in, uint16_t *output, int stride,
write_buffer_8x8(in8x8, rightDown, stride, fliplr, flipud, shift, bd);
}
-static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
+ int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
@@ -894,6 +1063,9 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[16], v[16], x, y;
int col;
@@ -945,14 +1117,10 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
u[5] = half_btf_sse4_1(&cospi24, &v[5], &cospim40, &v[6], &rnding, bit);
u[6] = half_btf_sse4_1(&cospi40, &v[5], &cospi24, &v[6], &rnding, bit);
u[7] = half_btf_sse4_1(&cospi8, &v[4], &cospi56, &v[7], &rnding, bit);
- u[8] = _mm_add_epi32(v[8], v[9]);
- u[9] = _mm_sub_epi32(v[8], v[9]);
- u[10] = _mm_sub_epi32(v[11], v[10]);
- u[11] = _mm_add_epi32(v[10], v[11]);
- u[12] = _mm_add_epi32(v[12], v[13]);
- u[13] = _mm_sub_epi32(v[12], v[13]);
- u[14] = _mm_sub_epi32(v[15], v[14]);
- u[15] = _mm_add_epi32(v[14], v[15]);
+ addsub_sse4_1(v[8], v[9], &u[8], &u[9], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[11], v[10], &u[11], &u[10], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[12], v[13], &u[12], &u[13], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[15], v[14], &u[15], &u[14], &clamp_lo, &clamp_hi);
// stage 4
x = _mm_mullo_epi32(u[0], cospi32);
@@ -967,10 +1135,8 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
v[2] = half_btf_sse4_1(&cospi48, &u[2], &cospim16, &u[3], &rnding, bit);
v[3] = half_btf_sse4_1(&cospi16, &u[2], &cospi48, &u[3], &rnding, bit);
- v[4] = _mm_add_epi32(u[4], u[5]);
- v[5] = _mm_sub_epi32(u[4], u[5]);
- v[6] = _mm_sub_epi32(u[7], u[6]);
- v[7] = _mm_add_epi32(u[6], u[7]);
+ addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi);
v[8] = u[8];
v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
@@ -981,10 +1147,8 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
v[15] = u[15];
// stage 5
- u[0] = _mm_add_epi32(v[0], v[3]);
- u[1] = _mm_add_epi32(v[1], v[2]);
- u[2] = _mm_sub_epi32(v[1], v[2]);
- u[3] = _mm_sub_epi32(v[0], v[3]);
+ addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi);
u[4] = v[4];
x = _mm_mullo_epi32(v[5], cospi32);
@@ -998,24 +1162,16 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
u[6] = _mm_srai_epi32(u[6], bit);
u[7] = v[7];
- u[8] = _mm_add_epi32(v[8], v[11]);
- u[9] = _mm_add_epi32(v[9], v[10]);
- u[10] = _mm_sub_epi32(v[9], v[10]);
- u[11] = _mm_sub_epi32(v[8], v[11]);
- u[12] = _mm_sub_epi32(v[15], v[12]);
- u[13] = _mm_sub_epi32(v[14], v[13]);
- u[14] = _mm_add_epi32(v[13], v[14]);
- u[15] = _mm_add_epi32(v[12], v[15]);
+ addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
// stage 6
- v[0] = _mm_add_epi32(u[0], u[7]);
- v[1] = _mm_add_epi32(u[1], u[6]);
- v[2] = _mm_add_epi32(u[2], u[5]);
- v[3] = _mm_add_epi32(u[3], u[4]);
- v[4] = _mm_sub_epi32(u[3], u[4]);
- v[5] = _mm_sub_epi32(u[2], u[5]);
- v[6] = _mm_sub_epi32(u[1], u[6]);
- v[7] = _mm_sub_epi32(u[0], u[7]);
+ addsub_sse4_1(u[0], u[7], &v[0], &v[7], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[1], u[6], &v[1], &v[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[2], u[5], &v[2], &v[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[3], u[4], &v[3], &v[4], &clamp_lo, &clamp_hi);
v[8] = u[8];
v[9] = u[9];
@@ -1043,386 +1199,1141 @@ static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
v[15] = u[15];
// stage 7
- out[0 * 4 + col] = _mm_add_epi32(v[0], v[15]);
- out[1 * 4 + col] = _mm_add_epi32(v[1], v[14]);
- out[2 * 4 + col] = _mm_add_epi32(v[2], v[13]);
- out[3 * 4 + col] = _mm_add_epi32(v[3], v[12]);
- out[4 * 4 + col] = _mm_add_epi32(v[4], v[11]);
- out[5 * 4 + col] = _mm_add_epi32(v[5], v[10]);
- out[6 * 4 + col] = _mm_add_epi32(v[6], v[9]);
- out[7 * 4 + col] = _mm_add_epi32(v[7], v[8]);
- out[8 * 4 + col] = _mm_sub_epi32(v[7], v[8]);
- out[9 * 4 + col] = _mm_sub_epi32(v[6], v[9]);
- out[10 * 4 + col] = _mm_sub_epi32(v[5], v[10]);
- out[11 * 4 + col] = _mm_sub_epi32(v[4], v[11]);
- out[12 * 4 + col] = _mm_sub_epi32(v[3], v[12]);
- out[13 * 4 + col] = _mm_sub_epi32(v[2], v[13]);
- out[14 * 4 + col] = _mm_sub_epi32(v[1], v[14]);
- out[15 * 4 + col] = _mm_sub_epi32(v[0], v[15]);
+ if (do_cols) {
+ addsub_no_clamp_sse4_1(v[0], v[15], out + 0 * 4 + col,
+ out + 15 * 4 + col);
+ addsub_no_clamp_sse4_1(v[1], v[14], out + 1 * 4 + col,
+ out + 14 * 4 + col);
+ addsub_no_clamp_sse4_1(v[2], v[13], out + 2 * 4 + col,
+ out + 13 * 4 + col);
+ addsub_no_clamp_sse4_1(v[3], v[12], out + 3 * 4 + col,
+ out + 12 * 4 + col);
+ addsub_no_clamp_sse4_1(v[4], v[11], out + 4 * 4 + col,
+ out + 11 * 4 + col);
+ addsub_no_clamp_sse4_1(v[5], v[10], out + 5 * 4 + col,
+ out + 10 * 4 + col);
+ addsub_no_clamp_sse4_1(v[6], v[9], out + 6 * 4 + col, out + 9 * 4 + col);
+ addsub_no_clamp_sse4_1(v[7], v[8], out + 7 * 4 + col, out + 8 * 4 + col);
+ } else {
+ addsub_shift_sse4_1(v[0], v[15], out + 0 * 4 + col, out + 15 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[1], v[14], out + 1 * 4 + col, out + 14 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[2], v[13], out + 2 * 4 + col, out + 13 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[3], v[12], out + 3 * 4 + col, out + 12 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[4], v[11], out + 4 * 4 + col, out + 11 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[5], v[10], out + 5 * 4 + col, out + 10 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[6], v[9], out + 6 * 4 + col, out + 9 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ addsub_shift_sse4_1(v[7], v[8], out + 7 * 4 + col, out + 8 * 4 + col,
+ &clamp_lo, &clamp_hi, out_shift);
+ }
}
}
-static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
+ int bd, int out_shift) {
const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+ const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
-
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
__m128i u[16], v[16], x, y;
+ const int col_num = 4;
int col;
- for (col = 0; col < 4; ++col) {
+ // Calculate the column 0, 1, 2, 3
+ for (col = 0; col < col_num; ++col) {
// stage 0
// stage 1
- u[0] = in[0 * 4 + col];
- u[1] = _mm_sub_epi32(zero, in[15 * 4 + col]);
- u[2] = _mm_sub_epi32(zero, in[7 * 4 + col]);
- u[3] = in[8 * 4 + col];
- u[4] = _mm_sub_epi32(zero, in[3 * 4 + col]);
- u[5] = in[12 * 4 + col];
- u[6] = in[4 * 4 + col];
- u[7] = _mm_sub_epi32(zero, in[11 * 4 + col]);
- u[8] = _mm_sub_epi32(zero, in[1 * 4 + col]);
- u[9] = in[14 * 4 + col];
- u[10] = in[6 * 4 + col];
- u[11] = _mm_sub_epi32(zero, in[9 * 4 + col]);
- u[12] = in[2 * 4 + col];
- u[13] = _mm_sub_epi32(zero, in[13 * 4 + col]);
- u[14] = _mm_sub_epi32(zero, in[5 * 4 + col]);
- u[15] = in[10 * 4 + col];
-
// stage 2
- v[0] = u[0];
- v[1] = u[1];
+ v[0] = _mm_mullo_epi32(in[15 * col_num + col], cospi2);
+ x = _mm_mullo_epi32(in[0 * col_num + col], cospi62);
+ v[0] = _mm_add_epi32(v[0], x);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ v[0] = _mm_srai_epi32(v[0], bit);
- x = _mm_mullo_epi32(u[2], cospi32);
- y = _mm_mullo_epi32(u[3], cospi32);
- v[2] = _mm_add_epi32(x, y);
+ v[1] = _mm_mullo_epi32(in[15 * col_num + col], cospi62);
+ x = _mm_mullo_epi32(in[0 * col_num + col], cospi2);
+ v[1] = _mm_sub_epi32(v[1], x);
+ v[1] = _mm_add_epi32(v[1], rnding);
+ v[1] = _mm_srai_epi32(v[1], bit);
+
+ v[2] = _mm_mullo_epi32(in[13 * col_num + col], cospi10);
+ x = _mm_mullo_epi32(in[2 * col_num + col], cospi54);
+ v[2] = _mm_add_epi32(v[2], x);
v[2] = _mm_add_epi32(v[2], rnding);
v[2] = _mm_srai_epi32(v[2], bit);
- v[3] = _mm_sub_epi32(x, y);
+ v[3] = _mm_mullo_epi32(in[13 * col_num + col], cospi54);
+ x = _mm_mullo_epi32(in[2 * col_num + col], cospi10);
+ v[3] = _mm_sub_epi32(v[3], x);
v[3] = _mm_add_epi32(v[3], rnding);
v[3] = _mm_srai_epi32(v[3], bit);
- v[4] = u[4];
- v[5] = u[5];
-
- x = _mm_mullo_epi32(u[6], cospi32);
- y = _mm_mullo_epi32(u[7], cospi32);
- v[6] = _mm_add_epi32(x, y);
+ v[4] = _mm_mullo_epi32(in[11 * col_num + col], cospi18);
+ x = _mm_mullo_epi32(in[4 * col_num + col], cospi46);
+ v[4] = _mm_add_epi32(v[4], x);
+ v[4] = _mm_add_epi32(v[4], rnding);
+ v[4] = _mm_srai_epi32(v[4], bit);
+
+ v[5] = _mm_mullo_epi32(in[11 * col_num + col], cospi46);
+ x = _mm_mullo_epi32(in[4 * col_num + col], cospi18);
+ v[5] = _mm_sub_epi32(v[5], x);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ v[6] = _mm_mullo_epi32(in[9 * col_num + col], cospi26);
+ x = _mm_mullo_epi32(in[6 * col_num + col], cospi38);
+ v[6] = _mm_add_epi32(v[6], x);
v[6] = _mm_add_epi32(v[6], rnding);
v[6] = _mm_srai_epi32(v[6], bit);
- v[7] = _mm_sub_epi32(x, y);
+ v[7] = _mm_mullo_epi32(in[9 * col_num + col], cospi38);
+ x = _mm_mullo_epi32(in[6 * col_num + col], cospi26);
+ v[7] = _mm_sub_epi32(v[7], x);
v[7] = _mm_add_epi32(v[7], rnding);
v[7] = _mm_srai_epi32(v[7], bit);
- v[8] = u[8];
- v[9] = u[9];
-
- x = _mm_mullo_epi32(u[10], cospi32);
- y = _mm_mullo_epi32(u[11], cospi32);
- v[10] = _mm_add_epi32(x, y);
+ v[8] = _mm_mullo_epi32(in[7 * col_num + col], cospi34);
+ x = _mm_mullo_epi32(in[8 * col_num + col], cospi30);
+ v[8] = _mm_add_epi32(v[8], x);
+ v[8] = _mm_add_epi32(v[8], rnding);
+ v[8] = _mm_srai_epi32(v[8], bit);
+
+ v[9] = _mm_mullo_epi32(in[7 * col_num + col], cospi30);
+ x = _mm_mullo_epi32(in[8 * col_num + col], cospi34);
+ v[9] = _mm_sub_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[10] = _mm_mullo_epi32(in[5 * col_num + col], cospi42);
+ x = _mm_mullo_epi32(in[10 * col_num + col], cospi22);
+ v[10] = _mm_add_epi32(v[10], x);
v[10] = _mm_add_epi32(v[10], rnding);
v[10] = _mm_srai_epi32(v[10], bit);
- v[11] = _mm_sub_epi32(x, y);
+ v[11] = _mm_mullo_epi32(in[5 * col_num + col], cospi22);
+ x = _mm_mullo_epi32(in[10 * col_num + col], cospi42);
+ v[11] = _mm_sub_epi32(v[11], x);
v[11] = _mm_add_epi32(v[11], rnding);
v[11] = _mm_srai_epi32(v[11], bit);
- v[12] = u[12];
- v[13] = u[13];
+ v[12] = _mm_mullo_epi32(in[3 * col_num + col], cospi50);
+ x = _mm_mullo_epi32(in[12 * col_num + col], cospi14);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
- x = _mm_mullo_epi32(u[14], cospi32);
- y = _mm_mullo_epi32(u[15], cospi32);
- v[14] = _mm_add_epi32(x, y);
+ v[13] = _mm_mullo_epi32(in[3 * col_num + col], cospi14);
+ x = _mm_mullo_epi32(in[12 * col_num + col], cospi50);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(in[1 * col_num + col], cospi58);
+ x = _mm_mullo_epi32(in[14 * col_num + col], cospi6);
+ v[14] = _mm_add_epi32(v[14], x);
v[14] = _mm_add_epi32(v[14], rnding);
v[14] = _mm_srai_epi32(v[14], bit);
- v[15] = _mm_sub_epi32(x, y);
+ v[15] = _mm_mullo_epi32(in[1 * col_num + col], cospi6);
+ x = _mm_mullo_epi32(in[14 * col_num + col], cospi58);
+ v[15] = _mm_sub_epi32(v[15], x);
v[15] = _mm_add_epi32(v[15], rnding);
v[15] = _mm_srai_epi32(v[15], bit);
// stage 3
- u[0] = _mm_add_epi32(v[0], v[2]);
- u[1] = _mm_add_epi32(v[1], v[3]);
- u[2] = _mm_sub_epi32(v[0], v[2]);
- u[3] = _mm_sub_epi32(v[1], v[3]);
- u[4] = _mm_add_epi32(v[4], v[6]);
- u[5] = _mm_add_epi32(v[5], v[7]);
- u[6] = _mm_sub_epi32(v[4], v[6]);
- u[7] = _mm_sub_epi32(v[5], v[7]);
- u[8] = _mm_add_epi32(v[8], v[10]);
- u[9] = _mm_add_epi32(v[9], v[11]);
- u[10] = _mm_sub_epi32(v[8], v[10]);
- u[11] = _mm_sub_epi32(v[9], v[11]);
- u[12] = _mm_add_epi32(v[12], v[14]);
- u[13] = _mm_add_epi32(v[13], v[15]);
- u[14] = _mm_sub_epi32(v[12], v[14]);
- u[15] = _mm_sub_epi32(v[13], v[15]);
+ addsub_sse4_1(v[0], v[8], &u[0], &u[8], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[1], v[9], &u[1], &u[9], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[2], v[10], &u[2], &u[10], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[3], v[11], &u[3], &u[11], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[4], v[12], &u[4], &u[12], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[5], v[13], &u[5], &u[13], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[6], v[14], &u[6], &u[14], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[7], v[15], &u[7], &u[15], &clamp_lo, &clamp_hi);
// stage 4
v[0] = u[0];
v[1] = u[1];
v[2] = u[2];
v[3] = u[3];
- v[4] = half_btf_sse4_1(&cospi16, &u[4], &cospi48, &u[5], &rnding, bit);
- v[5] = half_btf_sse4_1(&cospi48, &u[4], &cospim16, &u[5], &rnding, bit);
- v[6] = half_btf_sse4_1(&cospim48, &u[6], &cospi16, &u[7], &rnding, bit);
- v[7] = half_btf_sse4_1(&cospi16, &u[6], &cospi48, &u[7], &rnding, bit);
- v[8] = u[8];
- v[9] = u[9];
- v[10] = u[10];
- v[11] = u[11];
- v[12] = half_btf_sse4_1(&cospi16, &u[12], &cospi48, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi48, &u[12], &cospim16, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospim48, &u[14], &cospi16, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi16, &u[14], &cospi48, &u[15], &rnding, bit);
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+
+ v[8] = _mm_mullo_epi32(u[8], cospi8);
+ x = _mm_mullo_epi32(u[9], cospi56);
+ v[8] = _mm_add_epi32(v[8], x);
+ v[8] = _mm_add_epi32(v[8], rnding);
+ v[8] = _mm_srai_epi32(v[8], bit);
+
+ v[9] = _mm_mullo_epi32(u[8], cospi56);
+ x = _mm_mullo_epi32(u[9], cospi8);
+ v[9] = _mm_sub_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[10] = _mm_mullo_epi32(u[10], cospi40);
+ x = _mm_mullo_epi32(u[11], cospi24);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_mullo_epi32(u[10], cospi24);
+ x = _mm_mullo_epi32(u[11], cospi40);
+ v[11] = _mm_sub_epi32(v[11], x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_mullo_epi32(u[12], cospim56);
+ x = _mm_mullo_epi32(u[13], cospi8);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[13] = _mm_mullo_epi32(u[12], cospi8);
+ x = _mm_mullo_epi32(u[13], cospim56);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(u[14], cospim24);
+ x = _mm_mullo_epi32(u[15], cospi40);
+ v[14] = _mm_add_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_mullo_epi32(u[14], cospi40);
+ x = _mm_mullo_epi32(u[15], cospim24);
+ v[15] = _mm_sub_epi32(v[15], x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
// stage 5
- u[0] = _mm_add_epi32(v[0], v[4]);
- u[1] = _mm_add_epi32(v[1], v[5]);
- u[2] = _mm_add_epi32(v[2], v[6]);
- u[3] = _mm_add_epi32(v[3], v[7]);
- u[4] = _mm_sub_epi32(v[0], v[4]);
- u[5] = _mm_sub_epi32(v[1], v[5]);
- u[6] = _mm_sub_epi32(v[2], v[6]);
- u[7] = _mm_sub_epi32(v[3], v[7]);
- u[8] = _mm_add_epi32(v[8], v[12]);
- u[9] = _mm_add_epi32(v[9], v[13]);
- u[10] = _mm_add_epi32(v[10], v[14]);
- u[11] = _mm_add_epi32(v[11], v[15]);
- u[12] = _mm_sub_epi32(v[8], v[12]);
- u[13] = _mm_sub_epi32(v[9], v[13]);
- u[14] = _mm_sub_epi32(v[10], v[14]);
- u[15] = _mm_sub_epi32(v[11], v[15]);
+ addsub_sse4_1(v[0], v[4], &u[0], &u[4], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[1], v[5], &u[1], &u[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[2], v[6], &u[2], &u[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[3], v[7], &u[3], &u[7], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[8], v[12], &u[8], &u[12], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[9], v[13], &u[9], &u[13], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[10], v[14], &u[10], &u[14], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[11], v[15], &u[11], &u[15], &clamp_lo, &clamp_hi);
// stage 6
v[0] = u[0];
v[1] = u[1];
v[2] = u[2];
v[3] = u[3];
- v[4] = u[4];
- v[5] = u[5];
- v[6] = u[6];
- v[7] = u[7];
- v[8] = half_btf_sse4_1(&cospi8, &u[8], &cospi56, &u[9], &rnding, bit);
- v[9] = half_btf_sse4_1(&cospi56, &u[8], &cospim8, &u[9], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospi40, &u[10], &cospi24, &u[11], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospi24, &u[10], &cospim40, &u[11], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospim56, &u[12], &cospi8, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi8, &u[12], &cospi56, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospim24, &u[14], &cospi40, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi40, &u[14], &cospi24, &u[15], &rnding, bit);
+
+ v[4] = _mm_mullo_epi32(u[4], cospi16);
+ x = _mm_mullo_epi32(u[5], cospi48);
+ v[4] = _mm_add_epi32(v[4], x);
+ v[4] = _mm_add_epi32(v[4], rnding);
+ v[4] = _mm_srai_epi32(v[4], bit);
+
+ v[5] = _mm_mullo_epi32(u[4], cospi48);
+ x = _mm_mullo_epi32(u[5], cospi16);
+ v[5] = _mm_sub_epi32(v[5], x);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ v[6] = _mm_mullo_epi32(u[6], cospim48);
+ x = _mm_mullo_epi32(u[7], cospi16);
+ v[6] = _mm_add_epi32(v[6], x);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_mullo_epi32(u[6], cospi16);
+ x = _mm_mullo_epi32(u[7], cospim48);
+ v[7] = _mm_sub_epi32(v[7], x);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+ v[10] = u[10];
+ v[11] = u[11];
+
+ v[12] = _mm_mullo_epi32(u[12], cospi16);
+ x = _mm_mullo_epi32(u[13], cospi48);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[13] = _mm_mullo_epi32(u[12], cospi48);
+ x = _mm_mullo_epi32(u[13], cospi16);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(u[14], cospim48);
+ x = _mm_mullo_epi32(u[15], cospi16);
+ v[14] = _mm_add_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_mullo_epi32(u[14], cospi16);
+ x = _mm_mullo_epi32(u[15], cospim48);
+ v[15] = _mm_sub_epi32(v[15], x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
// stage 7
- u[0] = _mm_add_epi32(v[0], v[8]);
- u[1] = _mm_add_epi32(v[1], v[9]);
- u[2] = _mm_add_epi32(v[2], v[10]);
- u[3] = _mm_add_epi32(v[3], v[11]);
- u[4] = _mm_add_epi32(v[4], v[12]);
- u[5] = _mm_add_epi32(v[5], v[13]);
- u[6] = _mm_add_epi32(v[6], v[14]);
- u[7] = _mm_add_epi32(v[7], v[15]);
- u[8] = _mm_sub_epi32(v[0], v[8]);
- u[9] = _mm_sub_epi32(v[1], v[9]);
- u[10] = _mm_sub_epi32(v[2], v[10]);
- u[11] = _mm_sub_epi32(v[3], v[11]);
- u[12] = _mm_sub_epi32(v[4], v[12]);
- u[13] = _mm_sub_epi32(v[5], v[13]);
- u[14] = _mm_sub_epi32(v[6], v[14]);
- u[15] = _mm_sub_epi32(v[7], v[15]);
+ addsub_sse4_1(v[0], v[2], &u[0], &u[2], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[1], v[3], &u[1], &u[3], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[4], v[6], &u[4], &u[6], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[5], v[7], &u[5], &u[7], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[8], v[10], &u[8], &u[10], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[9], v[11], &u[9], &u[11], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[12], v[14], &u[12], &u[14], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[13], v[15], &u[13], &u[15], &clamp_lo, &clamp_hi);
// stage 8
- v[0] = half_btf_sse4_1(&cospi2, &u[0], &cospi62, &u[1], &rnding, bit);
- v[1] = half_btf_sse4_1(&cospi62, &u[0], &cospim2, &u[1], &rnding, bit);
- v[2] = half_btf_sse4_1(&cospi10, &u[2], &cospi54, &u[3], &rnding, bit);
- v[3] = half_btf_sse4_1(&cospi54, &u[2], &cospim10, &u[3], &rnding, bit);
- v[4] = half_btf_sse4_1(&cospi18, &u[4], &cospi46, &u[5], &rnding, bit);
- v[5] = half_btf_sse4_1(&cospi46, &u[4], &cospim18, &u[5], &rnding, bit);
- v[6] = half_btf_sse4_1(&cospi26, &u[6], &cospi38, &u[7], &rnding, bit);
- v[7] = half_btf_sse4_1(&cospi38, &u[6], &cospim26, &u[7], &rnding, bit);
- v[8] = half_btf_sse4_1(&cospi34, &u[8], &cospi30, &u[9], &rnding, bit);
- v[9] = half_btf_sse4_1(&cospi30, &u[8], &cospim34, &u[9], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospi42, &u[10], &cospi22, &u[11], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospi22, &u[10], &cospim42, &u[11], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospi50, &u[12], &cospi14, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi14, &u[12], &cospim50, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospi58, &u[14], &cospi6, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi6, &u[14], &cospim58, &u[15], &rnding, bit);
+ v[0] = u[0];
+ v[1] = u[1];
+
+ y = _mm_mullo_epi32(u[2], cospi32);
+ x = _mm_mullo_epi32(u[3], cospi32);
+ v[2] = _mm_add_epi32(y, x);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_sub_epi32(y, x);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = u[4];
+ v[5] = u[5];
+
+ y = _mm_mullo_epi32(u[6], cospi32);
+ x = _mm_mullo_epi32(u[7], cospi32);
+ v[6] = _mm_add_epi32(y, x);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_sub_epi32(y, x);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+
+ y = _mm_mullo_epi32(u[10], cospi32);
+ x = _mm_mullo_epi32(u[11], cospi32);
+ v[10] = _mm_add_epi32(y, x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_sub_epi32(y, x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = u[12];
+ v[13] = u[13];
+
+ y = _mm_mullo_epi32(u[14], cospi32);
+ x = _mm_mullo_epi32(u[15], cospi32);
+ v[14] = _mm_add_epi32(y, x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_sub_epi32(y, x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
// stage 9
- out[0 * 4 + col] = v[1];
- out[1 * 4 + col] = v[14];
- out[2 * 4 + col] = v[3];
- out[3 * 4 + col] = v[12];
- out[4 * 4 + col] = v[5];
- out[5 * 4 + col] = v[10];
- out[6 * 4 + col] = v[7];
- out[7 * 4 + col] = v[8];
- out[8 * 4 + col] = v[9];
- out[9 * 4 + col] = v[6];
- out[10 * 4 + col] = v[11];
- out[11 * 4 + col] = v[4];
- out[12 * 4 + col] = v[13];
- out[13 * 4 + col] = v[2];
- out[14 * 4 + col] = v[15];
- out[15 * 4 + col] = v[0];
+ if (do_cols) {
+ out[0 * col_num + col] = v[0];
+ out[1 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[8]);
+ out[2 * col_num + col] = v[12];
+ out[3 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[4]);
+ out[4 * col_num + col] = v[6];
+ out[5 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[14]);
+ out[6 * col_num + col] = v[10];
+ out[7 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[2]);
+ out[8 * col_num + col] = v[3];
+ out[9 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[11]);
+ out[10 * col_num + col] = v[15];
+ out[11 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[7]);
+ out[12 * col_num + col] = v[5];
+ out[13 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[13]);
+ out[14 * col_num + col] = v[9];
+ out[15 * col_num + col] = _mm_sub_epi32(_mm_setzero_si128(), v[1]);
+ } else {
+ neg_shift_sse4_1(v[0], v[8], out + 0 * col_num + col,
+ out + 1 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[12], v[4], out + 2 * col_num + col,
+ out + 3 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[6], v[14], out + 4 * col_num + col,
+ out + 5 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[10], v[2], out + 6 * col_num + col,
+ out + 7 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[3], v[11], out + 8 * col_num + col,
+ out + 9 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[15], v[7], out + 10 * col_num + col,
+ out + 11 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[5], v[13], out + 12 * col_num + col,
+ out + 13 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ neg_shift_sse4_1(v[9], v[1], out + 14 * col_num + col,
+ out + 15 * col_num + col, &clamp_lo, &clamp_hi,
+ out_shift);
+ }
}
}
-static void round_shift_16x16(__m128i *in, int shift) {
- round_shift_8x8(&in[0], shift);
- round_shift_8x8(&in[16], shift);
- round_shift_8x8(&in[32], shift);
- round_shift_8x8(&in[48], shift);
-}
-
void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
__m128i in[64], out[64];
- const TXFM_1D_CFG *row_cfg = NULL;
- const TXFM_1D_CFG *col_cfg = NULL;
+ const int8_t *shift = inv_txfm_shift_ls[TX_16X16];
+ const int txw_idx = get_txw_idx(TX_16X16);
+ const int txh_idx = get_txh_idx(TX_16X16);
switch (tx_type) {
case DCT_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_16;
- col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ idct16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 0, -shift[1], bd);
break;
case DCT_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ idct16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ idct16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 0, -shift[1], bd);
break;
case ADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 0, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 0, -shift[1], bd);
break;
-#if CONFIG_EXT_TX
case FLIPADST_DCT:
- row_cfg = &inv_txfm_1d_row_cfg_dct_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ idct16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 1, -shift[1], bd);
break;
case DCT_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_dct_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- idct16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ idct16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 1, 0, -shift[1], bd);
break;
case ADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 1, 0, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 1, 0, -shift[1], bd);
break;
case FLIPADST_FLIPADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 1, 1, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 1, 1, -shift[1], bd);
break;
case FLIPADST_ADST:
- row_cfg = &inv_txfm_1d_row_cfg_adst_16;
- col_cfg = &inv_txfm_1d_col_cfg_adst_16;
load_buffer_16x16(coeff, in);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, row_cfg->cos_bit[2]);
- round_shift_16x16(in, -row_cfg->shift[0]);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
transpose_16x16(in, out);
- iadst16x16_sse4_1(out, in, col_cfg->cos_bit[2]);
- write_buffer_16x16(in, output, stride, 0, 1, -row_cfg->shift[1], bd);
+ iadst16x16_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_16x16(in, output, stride, 0, 1, -shift[1], bd);
break;
-#endif
default: assert(0);
}
}
+
+static void load_buffer_64x64_lower_32x32(const int32_t *coeff, __m128i *in) {
+ int i, j;
+
+ __m128i zero = _mm_setzero_si128();
+
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 8; ++j) {
+ in[16 * i + j] =
+ _mm_loadu_si128((const __m128i *)(coeff + 32 * i + 4 * j));
+ in[16 * i + j + 8] = zero;
+ }
+ }
+
+ for (i = 0; i < 512; ++i) in[512 + i] = zero;
+}
+
+static void transpose_64x64(__m128i *in, __m128i *out, int do_cols) {
+ int i, j;
+ for (i = 0; i < (do_cols ? 16 : 8); ++i) {
+ for (j = 0; j < 8; ++j) {
+ TRANSPOSE_4X4(in[(4 * i + 0) * 16 + j], in[(4 * i + 1) * 16 + j],
+ in[(4 * i + 2) * 16 + j], in[(4 * i + 3) * 16 + j],
+ out[(4 * j + 0) * 16 + i], out[(4 * j + 1) * 16 + i],
+ out[(4 * j + 2) * 16 + i], out[(4 * j + 3) * 16 + i]);
+ }
+ }
+}
+
+static void assign_16x16_input_from_32x32(const __m128i *in, __m128i *in16x16,
+ int col) {
+ int i;
+ for (i = 0; i < 16 * 16 / 4; i += 4) {
+ in16x16[i] = in[col];
+ in16x16[i + 1] = in[col + 1];
+ in16x16[i + 2] = in[col + 2];
+ in16x16[i + 3] = in[col + 3];
+ col += 8;
+ }
+}
+
+static void write_buffer_32x32(__m128i *in, uint16_t *output, int stride,
+ int fliplr, int flipud, int shift, int bd) {
+ __m128i in16x16[16 * 16 / 4];
+ uint16_t *leftUp = &output[0];
+ uint16_t *rightUp = &output[16];
+ uint16_t *leftDown = &output[16 * stride];
+ uint16_t *rightDown = &output[16 * stride + 16];
+
+ if (fliplr) {
+ swap_addr(&leftUp, &rightUp);
+ swap_addr(&leftDown, &rightDown);
+ }
+
+ if (flipud) {
+ swap_addr(&leftUp, &leftDown);
+ swap_addr(&rightUp, &rightDown);
+ }
+
+ // Left-up quarter
+ assign_16x16_input_from_32x32(in, in16x16, 0);
+ write_buffer_16x16(in16x16, leftUp, stride, fliplr, flipud, shift, bd);
+
+ // Right-up quarter
+ assign_16x16_input_from_32x32(in, in16x16, 32 / 2 / 4);
+ write_buffer_16x16(in16x16, rightUp, stride, fliplr, flipud, shift, bd);
+
+ // Left-down quarter
+ assign_16x16_input_from_32x32(in, in16x16, 32 * 32 / 2 / 4);
+ write_buffer_16x16(in16x16, leftDown, stride, fliplr, flipud, shift, bd);
+
+ // Right-down quarter
+ assign_16x16_input_from_32x32(in, in16x16, 32 * 32 / 2 / 4 + 32 / 2 / 4);
+ write_buffer_16x16(in16x16, rightDown, stride, fliplr, flipud, shift, bd);
+}
+
+static void assign_32x32_input_from_64x64(const __m128i *in, __m128i *in32x32,
+ int col) {
+ int i;
+ for (i = 0; i < 32 * 32 / 4; i += 8) {
+ in32x32[i] = in[col];
+ in32x32[i + 1] = in[col + 1];
+ in32x32[i + 2] = in[col + 2];
+ in32x32[i + 3] = in[col + 3];
+ in32x32[i + 4] = in[col + 4];
+ in32x32[i + 5] = in[col + 5];
+ in32x32[i + 6] = in[col + 6];
+ in32x32[i + 7] = in[col + 7];
+ col += 16;
+ }
+}
+
+static void write_buffer_64x64(__m128i *in, uint16_t *output, int stride,
+ int fliplr, int flipud, int shift, int bd) {
+ __m128i in32x32[32 * 32 / 4];
+ uint16_t *leftUp = &output[0];
+ uint16_t *rightUp = &output[32];
+ uint16_t *leftDown = &output[32 * stride];
+ uint16_t *rightDown = &output[32 * stride + 32];
+
+ if (fliplr) {
+ swap_addr(&leftUp, &rightUp);
+ swap_addr(&leftDown, &rightDown);
+ }
+
+ if (flipud) {
+ swap_addr(&leftUp, &leftDown);
+ swap_addr(&rightUp, &rightDown);
+ }
+
+ // Left-up quarter
+ assign_32x32_input_from_64x64(in, in32x32, 0);
+ write_buffer_32x32(in32x32, leftUp, stride, fliplr, flipud, shift, bd);
+
+ // Right-up quarter
+ assign_32x32_input_from_64x64(in, in32x32, 64 / 2 / 4);
+ write_buffer_32x32(in32x32, rightUp, stride, fliplr, flipud, shift, bd);
+
+ // Left-down quarter
+ assign_32x32_input_from_64x64(in, in32x32, 64 * 64 / 2 / 4);
+ write_buffer_32x32(in32x32, leftDown, stride, fliplr, flipud, shift, bd);
+
+ // Right-down quarter
+ assign_32x32_input_from_64x64(in, in32x32, 64 * 64 / 2 / 4 + 64 / 2 / 4);
+ write_buffer_32x32(in32x32, rightDown, stride, fliplr, flipud, shift, bd);
+}
+
+static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
+ int bd, int out_shift) {
+ int i, j;
+ const int32_t *cospi = cospi_arr(bit);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
+ const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
+ const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
+ int col;
+
+ const __m128i cospi1 = _mm_set1_epi32(cospi[1]);
+ const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
+ const __m128i cospi3 = _mm_set1_epi32(cospi[3]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospi5 = _mm_set1_epi32(cospi[5]);
+ const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
+ const __m128i cospi7 = _mm_set1_epi32(cospi[7]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi9 = _mm_set1_epi32(cospi[9]);
+ const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
+ const __m128i cospi11 = _mm_set1_epi32(cospi[11]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospi13 = _mm_set1_epi32(cospi[13]);
+ const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
+ const __m128i cospi15 = _mm_set1_epi32(cospi[15]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospi17 = _mm_set1_epi32(cospi[17]);
+ const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
+ const __m128i cospi19 = _mm_set1_epi32(cospi[19]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospi21 = _mm_set1_epi32(cospi[21]);
+ const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
+ const __m128i cospi23 = _mm_set1_epi32(cospi[23]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi25 = _mm_set1_epi32(cospi[25]);
+ const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
+ const __m128i cospi27 = _mm_set1_epi32(cospi[27]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospi29 = _mm_set1_epi32(cospi[29]);
+ const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
+ const __m128i cospi31 = _mm_set1_epi32(cospi[31]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi35 = _mm_set1_epi32(cospi[35]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
+ const __m128i cospi39 = _mm_set1_epi32(cospi[39]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi43 = _mm_set1_epi32(cospi[43]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
+ const __m128i cospi47 = _mm_set1_epi32(cospi[47]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi51 = _mm_set1_epi32(cospi[51]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
+ const __m128i cospi55 = _mm_set1_epi32(cospi[55]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi59 = _mm_set1_epi32(cospi[59]);
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
+ const __m128i cospi63 = _mm_set1_epi32(cospi[63]);
+
+ const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospim12 = _mm_set1_epi32(-cospi[12]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+ const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+ const __m128i cospim28 = _mm_set1_epi32(-cospi[28]);
+ const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
+ const __m128i cospim33 = _mm_set1_epi32(-cospi[33]);
+ const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
+ const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospim37 = _mm_set1_epi32(-cospi[37]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospim41 = _mm_set1_epi32(-cospi[41]);
+ const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
+ const __m128i cospim44 = _mm_set1_epi32(-cospi[44]);
+ const __m128i cospim45 = _mm_set1_epi32(-cospi[45]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospim49 = _mm_set1_epi32(-cospi[49]);
+ const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
+ const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+ const __m128i cospim53 = _mm_set1_epi32(-cospi[53]);
+ const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+ const __m128i cospim57 = _mm_set1_epi32(-cospi[57]);
+ const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
+ const __m128i cospim60 = _mm_set1_epi32(-cospi[60]);
+ const __m128i cospim61 = _mm_set1_epi32(-cospi[61]);
+
+ for (col = 0; col < (do_cols ? 64 / 4 : 32 / 4); ++col) {
+ __m128i u[64], v[64];
+
+ // stage 1
+ u[32] = in[1 * 16 + col];
+ u[34] = in[17 * 16 + col];
+ u[36] = in[9 * 16 + col];
+ u[38] = in[25 * 16 + col];
+ u[40] = in[5 * 16 + col];
+ u[42] = in[21 * 16 + col];
+ u[44] = in[13 * 16 + col];
+ u[46] = in[29 * 16 + col];
+ u[48] = in[3 * 16 + col];
+ u[50] = in[19 * 16 + col];
+ u[52] = in[11 * 16 + col];
+ u[54] = in[27 * 16 + col];
+ u[56] = in[7 * 16 + col];
+ u[58] = in[23 * 16 + col];
+ u[60] = in[15 * 16 + col];
+ u[62] = in[31 * 16 + col];
+
+ v[16] = in[2 * 16 + col];
+ v[18] = in[18 * 16 + col];
+ v[20] = in[10 * 16 + col];
+ v[22] = in[26 * 16 + col];
+ v[24] = in[6 * 16 + col];
+ v[26] = in[22 * 16 + col];
+ v[28] = in[14 * 16 + col];
+ v[30] = in[30 * 16 + col];
+
+ u[8] = in[4 * 16 + col];
+ u[10] = in[20 * 16 + col];
+ u[12] = in[12 * 16 + col];
+ u[14] = in[28 * 16 + col];
+
+ v[4] = in[8 * 16 + col];
+ v[6] = in[24 * 16 + col];
+
+ u[0] = in[0 * 16 + col];
+ u[2] = in[16 * 16 + col];
+
+ // stage 2
+ v[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit);
+ v[33] = half_btf_0_sse4_1(&cospim33, &u[62], &rnding, bit);
+ v[34] = half_btf_0_sse4_1(&cospi47, &u[34], &rnding, bit);
+ v[35] = half_btf_0_sse4_1(&cospim49, &u[60], &rnding, bit);
+ v[36] = half_btf_0_sse4_1(&cospi55, &u[36], &rnding, bit);
+ v[37] = half_btf_0_sse4_1(&cospim41, &u[58], &rnding, bit);
+ v[38] = half_btf_0_sse4_1(&cospi39, &u[38], &rnding, bit);
+ v[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit);
+ v[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit);
+ v[41] = half_btf_0_sse4_1(&cospim37, &u[54], &rnding, bit);
+ v[42] = half_btf_0_sse4_1(&cospi43, &u[42], &rnding, bit);
+ v[43] = half_btf_0_sse4_1(&cospim53, &u[52], &rnding, bit);
+ v[44] = half_btf_0_sse4_1(&cospi51, &u[44], &rnding, bit);
+ v[45] = half_btf_0_sse4_1(&cospim45, &u[50], &rnding, bit);
+ v[46] = half_btf_0_sse4_1(&cospi35, &u[46], &rnding, bit);
+ v[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit);
+ v[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit);
+ v[49] = half_btf_0_sse4_1(&cospi29, &u[46], &rnding, bit);
+ v[50] = half_btf_0_sse4_1(&cospi19, &u[50], &rnding, bit);
+ v[51] = half_btf_0_sse4_1(&cospi13, &u[44], &rnding, bit);
+ v[52] = half_btf_0_sse4_1(&cospi11, &u[52], &rnding, bit);
+ v[53] = half_btf_0_sse4_1(&cospi21, &u[42], &rnding, bit);
+ v[54] = half_btf_0_sse4_1(&cospi27, &u[54], &rnding, bit);
+ v[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit);
+ v[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit);
+ v[57] = half_btf_0_sse4_1(&cospi25, &u[38], &rnding, bit);
+ v[58] = half_btf_0_sse4_1(&cospi23, &u[58], &rnding, bit);
+ v[59] = half_btf_0_sse4_1(&cospi9, &u[36], &rnding, bit);
+ v[60] = half_btf_0_sse4_1(&cospi15, &u[60], &rnding, bit);
+ v[61] = half_btf_0_sse4_1(&cospi17, &u[34], &rnding, bit);
+ v[62] = half_btf_0_sse4_1(&cospi31, &u[62], &rnding, bit);
+ v[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit);
+
+ // stage 3
+ u[16] = half_btf_0_sse4_1(&cospi62, &v[16], &rnding, bit);
+ u[17] = half_btf_0_sse4_1(&cospim34, &v[30], &rnding, bit);
+ u[18] = half_btf_0_sse4_1(&cospi46, &v[18], &rnding, bit);
+ u[19] = half_btf_0_sse4_1(&cospim50, &v[28], &rnding, bit);
+ u[20] = half_btf_0_sse4_1(&cospi54, &v[20], &rnding, bit);
+ u[21] = half_btf_0_sse4_1(&cospim42, &v[26], &rnding, bit);
+ u[22] = half_btf_0_sse4_1(&cospi38, &v[22], &rnding, bit);
+ u[23] = half_btf_0_sse4_1(&cospim58, &v[24], &rnding, bit);
+ u[24] = half_btf_0_sse4_1(&cospi6, &v[24], &rnding, bit);
+ u[25] = half_btf_0_sse4_1(&cospi26, &v[22], &rnding, bit);
+ u[26] = half_btf_0_sse4_1(&cospi22, &v[26], &rnding, bit);
+ u[27] = half_btf_0_sse4_1(&cospi10, &v[20], &rnding, bit);
+ u[28] = half_btf_0_sse4_1(&cospi14, &v[28], &rnding, bit);
+ u[29] = half_btf_0_sse4_1(&cospi18, &v[18], &rnding, bit);
+ u[30] = half_btf_0_sse4_1(&cospi30, &v[30], &rnding, bit);
+ u[31] = half_btf_0_sse4_1(&cospi2, &v[16], &rnding, bit);
+
+ for (i = 32; i < 64; i += 4) {
+ addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo,
+ &clamp_hi);
+ }
+
+ // stage 4
+ v[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit);
+ v[9] = half_btf_0_sse4_1(&cospim36, &u[14], &rnding, bit);
+ v[10] = half_btf_0_sse4_1(&cospi44, &u[10], &rnding, bit);
+ v[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit);
+ v[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit);
+ v[13] = half_btf_0_sse4_1(&cospi20, &u[10], &rnding, bit);
+ v[14] = half_btf_0_sse4_1(&cospi28, &u[14], &rnding, bit);
+ v[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit);
+
+ for (i = 16; i < 32; i += 4) {
+ addsub_sse4_1(u[i + 0], u[i + 1], &v[i + 0], &v[i + 1], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(u[i + 3], u[i + 2], &v[i + 3], &v[i + 2], &clamp_lo,
+ &clamp_hi);
+ }
+
+ for (i = 32; i < 64; i += 4) {
+ v[i + 0] = u[i + 0];
+ v[i + 3] = u[i + 3];
+ }
+
+ v[33] = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit);
+ v[34] = half_btf_sse4_1(&cospim60, &u[34], &cospim4, &u[61], &rnding, bit);
+ v[37] = half_btf_sse4_1(&cospim36, &u[37], &cospi28, &u[58], &rnding, bit);
+ v[38] = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit);
+ v[41] = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit);
+ v[42] = half_btf_sse4_1(&cospim44, &u[42], &cospim20, &u[53], &rnding, bit);
+ v[45] = half_btf_sse4_1(&cospim52, &u[45], &cospi12, &u[50], &rnding, bit);
+ v[46] = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit);
+ v[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit);
+ v[50] = half_btf_sse4_1(&cospi12, &u[45], &cospi52, &u[50], &rnding, bit);
+ v[53] = half_btf_sse4_1(&cospim20, &u[42], &cospi44, &u[53], &rnding, bit);
+ v[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit);
+ v[57] = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit);
+ v[58] = half_btf_sse4_1(&cospi28, &u[37], &cospi36, &u[58], &rnding, bit);
+ v[61] = half_btf_sse4_1(&cospim4, &u[34], &cospi60, &u[61], &rnding, bit);
+ v[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit);
+
+ // stage 5
+ u[4] = half_btf_0_sse4_1(&cospi56, &v[4], &rnding, bit);
+ u[5] = half_btf_0_sse4_1(&cospim40, &v[6], &rnding, bit);
+ u[6] = half_btf_0_sse4_1(&cospi24, &v[6], &rnding, bit);
+ u[7] = half_btf_0_sse4_1(&cospi8, &v[4], &rnding, bit);
+
+ for (i = 8; i < 16; i += 4) {
+ addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo,
+ &clamp_hi);
+ }
+
+ for (i = 16; i < 32; i += 4) {
+ u[i + 0] = v[i + 0];
+ u[i + 3] = v[i + 3];
+ }
+
+ u[17] = half_btf_sse4_1(&cospim8, &v[17], &cospi56, &v[30], &rnding, bit);
+ u[18] = half_btf_sse4_1(&cospim56, &v[18], &cospim8, &v[29], &rnding, bit);
+ u[21] = half_btf_sse4_1(&cospim40, &v[21], &cospi24, &v[26], &rnding, bit);
+ u[22] = half_btf_sse4_1(&cospim24, &v[22], &cospim40, &v[25], &rnding, bit);
+ u[25] = half_btf_sse4_1(&cospim40, &v[22], &cospi24, &v[25], &rnding, bit);
+ u[26] = half_btf_sse4_1(&cospi24, &v[21], &cospi40, &v[26], &rnding, bit);
+ u[29] = half_btf_sse4_1(&cospim8, &v[18], &cospi56, &v[29], &rnding, bit);
+ u[30] = half_btf_sse4_1(&cospi56, &v[17], &cospi8, &v[30], &rnding, bit);
+
+ for (i = 32; i < 64; i += 8) {
+ addsub_sse4_1(v[i + 0], v[i + 3], &u[i + 0], &u[i + 3], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(v[i + 1], v[i + 2], &u[i + 1], &u[i + 2], &clamp_lo,
+ &clamp_hi);
+
+ addsub_sse4_1(v[i + 7], v[i + 4], &u[i + 7], &u[i + 4], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(v[i + 6], v[i + 5], &u[i + 6], &u[i + 5], &clamp_lo,
+ &clamp_hi);
+ }
+
+ // stage 6
+ v[0] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
+ v[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
+ v[2] = half_btf_0_sse4_1(&cospi48, &u[2], &rnding, bit);
+ v[3] = half_btf_0_sse4_1(&cospi16, &u[2], &rnding, bit);
+
+ addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi);
+
+ for (i = 8; i < 16; i += 4) {
+ v[i + 0] = u[i + 0];
+ v[i + 3] = u[i + 3];
+ }
+
+ v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
+ v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
+ v[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit);
+ v[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
+
+ for (i = 16; i < 32; i += 8) {
+ addsub_sse4_1(u[i + 0], u[i + 3], &v[i + 0], &v[i + 3], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(u[i + 1], u[i + 2], &v[i + 1], &v[i + 2], &clamp_lo,
+ &clamp_hi);
+
+ addsub_sse4_1(u[i + 7], u[i + 4], &v[i + 7], &v[i + 4], &clamp_lo,
+ &clamp_hi);
+ addsub_sse4_1(u[i + 6], u[i + 5], &v[i + 6], &v[i + 5], &clamp_lo,
+ &clamp_hi);
+ }
+
+ for (i = 32; i < 64; i += 8) {
+ v[i + 0] = u[i + 0];
+ v[i + 1] = u[i + 1];
+ v[i + 6] = u[i + 6];
+ v[i + 7] = u[i + 7];
+ }
+
+ v[34] = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit);
+ v[35] = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit);
+ v[36] = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit);
+ v[37] = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit);
+ v[42] = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit);
+ v[43] = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit);
+ v[44] = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit);
+ v[45] = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit);
+ v[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit);
+ v[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit);
+ v[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit);
+ v[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit);
+ v[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit);
+ v[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit);
+ v[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit);
+ v[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit);
+
+ // stage 7
+ addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi);
+
+ u[4] = v[4];
+ u[7] = v[7];
+ u[5] = half_btf_sse4_1(&cospim32, &v[5], &cospi32, &v[6], &rnding, bit);
+ u[6] = half_btf_sse4_1(&cospi32, &v[5], &cospi32, &v[6], &rnding, bit);
+
+ addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
+
+ for (i = 16; i < 32; i += 8) {
+ u[i + 0] = v[i + 0];
+ u[i + 1] = v[i + 1];
+ u[i + 6] = v[i + 6];
+ u[i + 7] = v[i + 7];
+ }
+
+ u[18] = half_btf_sse4_1(&cospim16, &v[18], &cospi48, &v[29], &rnding, bit);
+ u[19] = half_btf_sse4_1(&cospim16, &v[19], &cospi48, &v[28], &rnding, bit);
+ u[20] = half_btf_sse4_1(&cospim48, &v[20], &cospim16, &v[27], &rnding, bit);
+ u[21] = half_btf_sse4_1(&cospim48, &v[21], &cospim16, &v[26], &rnding, bit);
+ u[26] = half_btf_sse4_1(&cospim16, &v[21], &cospi48, &v[26], &rnding, bit);
+ u[27] = half_btf_sse4_1(&cospim16, &v[20], &cospi48, &v[27], &rnding, bit);
+ u[28] = half_btf_sse4_1(&cospi48, &v[19], &cospi16, &v[28], &rnding, bit);
+ u[29] = half_btf_sse4_1(&cospi48, &v[18], &cospi16, &v[29], &rnding, bit);
+
+ for (i = 32; i < 64; i += 16) {
+ for (j = i; j < i + 4; j++) {
+ addsub_sse4_1(v[j], v[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(v[j ^ 15], v[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo,
+ &clamp_hi);
+ }
+ }
+
+ // stage 8
+ for (i = 0; i < 4; ++i) {
+ addsub_sse4_1(u[i], u[7 - i], &v[i], &v[7 - i], &clamp_lo, &clamp_hi);
+ }
+
+ v[8] = u[8];
+ v[9] = u[9];
+ v[14] = u[14];
+ v[15] = u[15];
+
+ v[10] = half_btf_sse4_1(&cospim32, &u[10], &cospi32, &u[13], &rnding, bit);
+ v[11] = half_btf_sse4_1(&cospim32, &u[11], &cospi32, &u[12], &rnding, bit);
+ v[12] = half_btf_sse4_1(&cospi32, &u[11], &cospi32, &u[12], &rnding, bit);
+ v[13] = half_btf_sse4_1(&cospi32, &u[10], &cospi32, &u[13], &rnding, bit);
+
+ for (i = 16; i < 20; ++i) {
+ addsub_sse4_1(u[i], u[i ^ 7], &v[i], &v[i ^ 7], &clamp_lo, &clamp_hi);
+ addsub_sse4_1(u[i ^ 15], u[i ^ 8], &v[i ^ 15], &v[i ^ 8], &clamp_lo,
+ &clamp_hi);
+ }
+
+ for (i = 32; i < 36; ++i) {
+ v[i] = u[i];
+ v[i + 12] = u[i + 12];
+ v[i + 16] = u[i + 16];
+ v[i + 28] = u[i + 28];
+ }
+
+ v[36] = half_btf_sse4_1(&cospim16, &u[36], &cospi48, &u[59], &rnding, bit);
+ v[37] = half_btf_sse4_1(&cospim16, &u[37], &cospi48, &u[58], &rnding, bit);
+ v[38] = half_btf_sse4_1(&cospim16, &u[38], &cospi48, &u[57], &rnding, bit);
+ v[39] = half_btf_sse4_1(&cospim16, &u[39], &cospi48, &u[56], &rnding, bit);
+ v[40] = half_btf_sse4_1(&cospim48, &u[40], &cospim16, &u[55], &rnding, bit);
+ v[41] = half_btf_sse4_1(&cospim48, &u[41], &cospim16, &u[54], &rnding, bit);
+ v[42] = half_btf_sse4_1(&cospim48, &u[42], &cospim16, &u[53], &rnding, bit);
+ v[43] = half_btf_sse4_1(&cospim48, &u[43], &cospim16, &u[52], &rnding, bit);
+ v[52] = half_btf_sse4_1(&cospim16, &u[43], &cospi48, &u[52], &rnding, bit);
+ v[53] = half_btf_sse4_1(&cospim16, &u[42], &cospi48, &u[53], &rnding, bit);
+ v[54] = half_btf_sse4_1(&cospim16, &u[41], &cospi48, &u[54], &rnding, bit);
+ v[55] = half_btf_sse4_1(&cospim16, &u[40], &cospi48, &u[55], &rnding, bit);
+ v[56] = half_btf_sse4_1(&cospi48, &u[39], &cospi16, &u[56], &rnding, bit);
+ v[57] = half_btf_sse4_1(&cospi48, &u[38], &cospi16, &u[57], &rnding, bit);
+ v[58] = half_btf_sse4_1(&cospi48, &u[37], &cospi16, &u[58], &rnding, bit);
+ v[59] = half_btf_sse4_1(&cospi48, &u[36], &cospi16, &u[59], &rnding, bit);
+
+ // stage 9
+ for (i = 0; i < 8; ++i) {
+ addsub_sse4_1(v[i], v[15 - i], &u[i], &u[15 - i], &clamp_lo, &clamp_hi);
+ }
+
+ for (i = 16; i < 20; ++i) {
+ u[i] = v[i];
+ u[i + 12] = v[i + 12];
+ }
+
+ u[20] = half_btf_sse4_1(&cospim32, &v[20], &cospi32, &v[27], &rnding, bit);
+ u[21] = half_btf_sse4_1(&cospim32, &v[21], &cospi32, &v[26], &rnding, bit);
+ u[22] = half_btf_sse4_1(&cospim32, &v[22], &cospi32, &v[25], &rnding, bit);
+ u[23] = half_btf_sse4_1(&cospim32, &v[23], &cospi32, &v[24], &rnding, bit);
+ u[24] = half_btf_sse4_1(&cospi32, &v[23], &cospi32, &v[24], &rnding, bit);
+ u[25] = half_btf_sse4_1(&cospi32, &v[22], &cospi32, &v[25], &rnding, bit);
+ u[26] = half_btf_sse4_1(&cospi32, &v[21], &cospi32, &v[26], &rnding, bit);
+ u[27] = half_btf_sse4_1(&cospi32, &v[20], &cospi32, &v[27], &rnding, bit);
+
+ for (i = 32; i < 40; i++) {
+ addsub_sse4_1(v[i], v[i ^ 15], &u[i], &u[i ^ 15], &clamp_lo, &clamp_hi);
+ }
+
+ for (i = 48; i < 56; i++) {
+ addsub_sse4_1(v[i ^ 15], v[i], &u[i ^ 15], &u[i], &clamp_lo, &clamp_hi);
+ }
+
+ // stage 10
+ for (i = 0; i < 16; i++) {
+ addsub_sse4_1(u[i], u[31 - i], &v[i], &v[31 - i], &clamp_lo, &clamp_hi);
+ }
+
+ for (i = 32; i < 40; i++) v[i] = u[i];
+
+ v[40] = half_btf_sse4_1(&cospim32, &u[40], &cospi32, &u[55], &rnding, bit);
+ v[41] = half_btf_sse4_1(&cospim32, &u[41], &cospi32, &u[54], &rnding, bit);
+ v[42] = half_btf_sse4_1(&cospim32, &u[42], &cospi32, &u[53], &rnding, bit);
+ v[43] = half_btf_sse4_1(&cospim32, &u[43], &cospi32, &u[52], &rnding, bit);
+ v[44] = half_btf_sse4_1(&cospim32, &u[44], &cospi32, &u[51], &rnding, bit);
+ v[45] = half_btf_sse4_1(&cospim32, &u[45], &cospi32, &u[50], &rnding, bit);
+ v[46] = half_btf_sse4_1(&cospim32, &u[46], &cospi32, &u[49], &rnding, bit);
+ v[47] = half_btf_sse4_1(&cospim32, &u[47], &cospi32, &u[48], &rnding, bit);
+ v[48] = half_btf_sse4_1(&cospi32, &u[47], &cospi32, &u[48], &rnding, bit);
+ v[49] = half_btf_sse4_1(&cospi32, &u[46], &cospi32, &u[49], &rnding, bit);
+ v[50] = half_btf_sse4_1(&cospi32, &u[45], &cospi32, &u[50], &rnding, bit);
+ v[51] = half_btf_sse4_1(&cospi32, &u[44], &cospi32, &u[51], &rnding, bit);
+ v[52] = half_btf_sse4_1(&cospi32, &u[43], &cospi32, &u[52], &rnding, bit);
+ v[53] = half_btf_sse4_1(&cospi32, &u[42], &cospi32, &u[53], &rnding, bit);
+ v[54] = half_btf_sse4_1(&cospi32, &u[41], &cospi32, &u[54], &rnding, bit);
+ v[55] = half_btf_sse4_1(&cospi32, &u[40], &cospi32, &u[55], &rnding, bit);
+
+ for (i = 56; i < 64; i++) v[i] = u[i];
+
+ // stage 11
+ if (do_cols) {
+ for (i = 0; i < 32; i++) {
+ addsub_no_clamp_sse4_1(v[i], v[63 - i], &out[16 * (i) + col],
+ &out[16 * (63 - i) + col]);
+ }
+ } else {
+ for (i = 0; i < 32; i++) {
+ addsub_shift_sse4_1(v[i], v[63 - i], &out[16 * (i) + col],
+ &out[16 * (63 - i) + col], &clamp_lo, &clamp_hi,
+ out_shift);
+ }
+ }
+ }
+}
+
+void av1_inv_txfm2d_add_64x64_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, TX_TYPE tx_type, int bd) {
+ __m128i in[64 * 64 / 4], out[64 * 64 / 4];
+ const int8_t *shift = inv_txfm_shift_ls[TX_64X64];
+ const int txw_idx = tx_size_wide_log2[TX_64X64] - tx_size_wide_log2[0];
+ const int txh_idx = tx_size_high_log2[TX_64X64] - tx_size_high_log2[0];
+
+ switch (tx_type) {
+ case DCT_DCT:
+ load_buffer_64x64_lower_32x32(coeff, in);
+ transpose_64x64(in, out, 0);
+ idct64x64_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
+ -shift[0]);
+ transpose_64x64(in, out, 1);
+ idct64x64_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
+ write_buffer_64x64(in, output, stride, 0, 0, -shift[1], bd);
+ break;
+
+ default:
+ av1_inv_txfm2d_add_64x64_c(coeff, output, stride, tx_type, bd);
+ break;
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c
new file mode 100644
index 000000000..89d0ecb1e
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c
@@ -0,0 +1,853 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/convolve_common_intrin.h"
+#include "aom_dsp/x86/convolve_sse4_1.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "av1/common/convolve.h"
+
+void av1_highbd_jnt_convolve_2d_copy_avx2(
+ const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi32(w0);
+ const __m256i wt1 = _mm256_set1_epi32(w1);
+ const __m256i zero = _mm256_setzero_si256();
+ int i, j;
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi32(offset);
+ const __m256i offset_const_16b = _mm256_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
+ const __m256i clip_pixel_to_bd =
+ _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ assert(bits <= 4);
+
+ if (!(w % 16)) {
+ for (i = 0; i < h; i += 1) {
+ for (j = 0; j < w; j += 16) {
+ const __m256i src_16bit =
+ _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j]));
+
+ const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
+
+ if (do_average) {
+ const __m256i data_0 =
+ _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j]));
+
+ const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_0, zero);
+ const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_0, zero);
+
+ const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero);
+ const __m256i res_unsigned_lo =
+ _mm256_add_epi32(res_32b_lo, offset_const);
+
+ const __m256i comp_avg_res_lo = highbd_comp_avg(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero);
+ const __m256i res_unsigned_hi =
+ _mm256_add_epi32(res_32b_hi, offset_const);
+
+ const __m256i comp_avg_res_hi = highbd_comp_avg(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result_lo = highbd_convolve_rounding(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+ const __m256i round_result_hi = highbd_convolve_rounding(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result_lo, round_result_hi);
+ const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ _mm256_store_si256((__m256i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ const __m256i res_unsigned_16b =
+ _mm256_adds_epu16(res, offset_const_16b);
+
+ _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]),
+ res_unsigned_16b);
+ }
+ }
+ }
+ } else if (!(w % 4)) {
+ for (i = 0; i < h; i += 2) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i src_row_0 =
+ _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]));
+ const __m128i src_row_1 =
+ _mm_loadu_si128((__m128i *)(&src[i * src_stride + j + src_stride]));
+ // since not all compilers yet support _mm256_set_m128i()
+ const __m256i src_10 = _mm256_insertf128_si256(
+ _mm256_castsi128_si256(src_row_0), src_row_1, 1);
+
+ const __m256i res = _mm256_sll_epi16(src_10, left_shift);
+
+ if (w - j < 8) {
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
+
+ const __m256i res_32b = _mm256_unpacklo_epi16(res, zero);
+ const __m256i res_unsigned_lo =
+ _mm256_add_epi32(res_32b, offset_const);
+
+ const __m256i comp_avg_res = highbd_comp_avg(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result = highbd_convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result, round_result);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ const __m256i res_unsigned_16b =
+ _mm256_adds_epu16(res, offset_const_16b);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
+ const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
+
+ const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero);
+ const __m256i res_unsigned_lo =
+ _mm256_add_epi32(res_32b_lo, offset_const);
+
+ const __m256i comp_avg_res_lo = highbd_comp_avg(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero);
+ const __m256i res_unsigned_hi =
+ _mm256_add_epi32(res_32b_hi, offset_const);
+
+ const __m256i comp_avg_res_hi = highbd_comp_avg(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result_lo =
+ highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m256i round_result_hi =
+ highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result_lo, round_result_hi);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_store_si128(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ const __m256i res_unsigned_16b =
+ _mm256_adds_epu16(res, offset_const_16b);
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1);
+
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride,
+ uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4,
+ const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = 8;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ // Check that, even with 12-bit input, the intermediate values will fit
+ // into an unsigned 16-bit intermediate array.
+ assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
+ __m256i s[8], coeffs_y[4], coeffs_x[4];
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi32(w0);
+ const __m256i wt1 = _mm256_set1_epi32(w1);
+ const __m256i zero = _mm256_setzero_si256();
+
+ const __m256i round_const_x = _mm256_set1_epi32(
+ ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
+
+ const __m256i round_const_y = _mm256_set1_epi32(
+ ((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
+
+ const __m256i clip_pixel_to_bd =
+ _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ {
+ for (i = 0; i < im_h; i += 2) {
+ const __m256i row0 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
+ __m256i row1 = _mm256_set1_epi16(0);
+ if (i + 1 < im_h)
+ row1 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
+
+ const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
+ const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
+
+ // even pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 0);
+ s[1] = _mm256_alignr_epi8(r1, r0, 4);
+ s[2] = _mm256_alignr_epi8(r1, r0, 8);
+ s[3] = _mm256_alignr_epi8(r1, r0, 12);
+
+ __m256i res_even = convolve(s, coeffs_x);
+ res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
+ round_shift_x);
+
+ // odd pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 2);
+ s[1] = _mm256_alignr_epi8(r1, r0, 6);
+ s[2] = _mm256_alignr_epi8(r1, r0, 10);
+ s[3] = _mm256_alignr_epi8(r1, r0, 14);
+
+ __m256i res_odd = convolve(s, coeffs_x);
+ res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
+ round_shift_x);
+
+ __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
+ __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
+ __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
+
+ _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
+ }
+ }
+
+ /* Vertical filter */
+ {
+ __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
+ __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
+ __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
+ __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
+ __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
+ __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
+
+ s[0] = _mm256_unpacklo_epi16(s0, s1);
+ s[1] = _mm256_unpacklo_epi16(s2, s3);
+ s[2] = _mm256_unpacklo_epi16(s4, s5);
+
+ s[4] = _mm256_unpackhi_epi16(s0, s1);
+ s[5] = _mm256_unpackhi_epi16(s2, s3);
+ s[6] = _mm256_unpackhi_epi16(s4, s5);
+
+ for (i = 0; i < h; i += 2) {
+ const int16_t *data = &im_block[i * im_stride];
+
+ const __m256i s6 =
+ _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
+ const __m256i s7 =
+ _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
+
+ s[3] = _mm256_unpacklo_epi16(s6, s7);
+ s[7] = _mm256_unpackhi_epi16(s6, s7);
+
+ const __m256i res_a = convolve(s, coeffs_y);
+
+ const __m256i res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a, round_const_y), round_shift_y);
+
+ const __m256i res_unsigned_lo =
+ _mm256_add_epi32(res_a_round, offset_const);
+
+ if (w - j < 8) {
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
+
+ const __m256i comp_avg_res = highbd_comp_avg(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result = highbd_convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result, round_result);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ const __m256i res_b = convolve(s + 4, coeffs_y);
+ const __m256i res_b_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_b, round_const_y), round_shift_y);
+
+ __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const);
+
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
+ const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
+
+ const __m256i comp_avg_res_lo = highbd_comp_avg(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m256i comp_avg_res_hi = highbd_comp_avg(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result_lo =
+ highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m256i round_result_hi =
+ highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result_lo, round_result_hi);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_store_si128(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride,
+ uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4,
+ const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ int i, j;
+ __m256i s[4], coeffs_x[4];
+
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi32(w0);
+ const __m256i wt1 = _mm256_set1_epi32(w1);
+ const __m256i zero = _mm256_setzero_si256();
+
+ const __m256i round_const_x =
+ _mm256_set1_epi32(((1 << conv_params->round_0) >> 1));
+ const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
+ const __m256i clip_pixel_to_bd =
+ _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ assert(bits >= 0);
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ for (i = 0; i < h; i += 2) {
+ const __m256i row0 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
+ __m256i row1 =
+ _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
+
+ const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
+ const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
+
+ // even pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 0);
+ s[1] = _mm256_alignr_epi8(r1, r0, 4);
+ s[2] = _mm256_alignr_epi8(r1, r0, 8);
+ s[3] = _mm256_alignr_epi8(r1, r0, 12);
+
+ __m256i res_even = convolve(s, coeffs_x);
+ res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
+ round_shift_x);
+
+ // odd pixels
+ s[0] = _mm256_alignr_epi8(r1, r0, 2);
+ s[1] = _mm256_alignr_epi8(r1, r0, 6);
+ s[2] = _mm256_alignr_epi8(r1, r0, 10);
+ s[3] = _mm256_alignr_epi8(r1, r0, 14);
+
+ __m256i res_odd = convolve(s, coeffs_x);
+ res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
+ round_shift_x);
+
+ res_even = _mm256_sll_epi32(res_even, round_shift_bits);
+ res_odd = _mm256_sll_epi32(res_odd, round_shift_bits);
+
+ __m256i res1 = _mm256_unpacklo_epi32(res_even, res_odd);
+
+ __m256i res_unsigned_lo = _mm256_add_epi32(res1, offset_const);
+
+ if (w - j < 8) {
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
+
+ const __m256i comp_avg_res = highbd_comp_avg(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result = highbd_convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result, round_result);
+ const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ __m256i res2 = _mm256_unpackhi_epi32(res_even, res_odd);
+ __m256i res_unsigned_hi = _mm256_add_epi32(res2, offset_const);
+
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
+ const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
+
+ const __m256i comp_avg_res_lo = highbd_comp_avg(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m256i comp_avg_res_hi = highbd_comp_avg(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result_lo = highbd_convolve_rounding(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+ const __m256i round_result_hi = highbd_convolve_rounding(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result_lo, round_result_hi);
+ const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
+ res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride,
+ uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4,
+ const int subpel_y_q4,
+ ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+
+ assert(bits >= 0);
+ int i, j;
+ __m256i s[8], coeffs_y[4];
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi32(w0);
+ const __m256i wt1 = _mm256_set1_epi32(w1);
+ const __m256i round_const_y =
+ _mm256_set1_epi32(((1 << conv_params->round_1) >> 1));
+ const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
+ const __m256i clip_pixel_to_bd =
+ _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const __m256i zero = _mm256_setzero_si256();
+
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ for (j = 0; j < w; j += 8) {
+ const uint16_t *data = &src_ptr[j];
+ /* Vertical filter */
+ {
+ __m256i src6;
+ __m256i s01 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ 0x20);
+ __m256i s12 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ 0x20);
+ __m256i s23 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ 0x20);
+ __m256i s34 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ 0x20);
+ __m256i s45 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ 0x20);
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
+ __m256i s56 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ src6, 0x20);
+
+ s[0] = _mm256_unpacklo_epi16(s01, s12);
+ s[1] = _mm256_unpacklo_epi16(s23, s34);
+ s[2] = _mm256_unpacklo_epi16(s45, s56);
+
+ s[4] = _mm256_unpackhi_epi16(s01, s12);
+ s[5] = _mm256_unpackhi_epi16(s23, s34);
+ s[6] = _mm256_unpackhi_epi16(s45, s56);
+
+ for (i = 0; i < h; i += 2) {
+ data = &src_ptr[i * src_stride + j];
+
+ const __m256i s67 = _mm256_permute2x128_si256(
+ src6,
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ 0x20);
+
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
+
+ const __m256i s78 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ src6, 0x20);
+
+ s[3] = _mm256_unpacklo_epi16(s67, s78);
+ s[7] = _mm256_unpackhi_epi16(s67, s78);
+
+ const __m256i res_a = convolve(s, coeffs_y);
+
+ __m256i res_a_round = _mm256_sll_epi32(res_a, round_shift_bits);
+ res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a_round, round_const_y), round_shift_y);
+
+ __m256i res_unsigned_lo = _mm256_add_epi32(res_a_round, offset_const);
+
+ if (w - j < 8) {
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
+
+ const __m256i comp_avg_res = highbd_comp_avg(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result = highbd_convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result, round_result);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ const __m256i res_b = convolve(s + 4, coeffs_y);
+ __m256i res_b_round = _mm256_sll_epi32(res_b, round_shift_bits);
+ res_b_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_b_round, round_const_y), round_shift_y);
+
+ __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const);
+
+ if (do_average) {
+ const __m256i data_0 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
+ const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
+ const __m256i data_01 =
+ _mm256_permute2x128_si256(data_0, data_1, 0x20);
+
+ const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
+ const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
+
+ const __m256i comp_avg_res_lo = highbd_comp_avg(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m256i comp_avg_res_hi = highbd_comp_avg(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m256i round_result_lo =
+ highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m256i round_result_hi =
+ highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m256i res_16b =
+ _mm256_packus_epi32(round_result_lo, round_result_hi);
+ const __m256i res_clip =
+ _mm256_min_epi16(res_16b, clip_pixel_to_bd);
+
+ const __m128i res_0 = _mm256_castsi256_si128(res_clip);
+ const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_store_si128(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
+ } else {
+ __m256i res_16b =
+ _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
+ const __m128i res_0 = _mm256_castsi256_si128(res_16b);
+ const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
+
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c
new file mode 100644
index 000000000..ccca6b07a
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <smmintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/x86/convolve_sse2.h"
+#include "aom_dsp/x86/convolve_sse4_1.h"
+
+void av1_highbd_jnt_convolve_y_sse4_1(
+ const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+
+ assert(bits >= 0);
+ int i, j;
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+ const __m128i round_const_y =
+ _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
+ const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
+ const __m128i clip_pixel_to_bd =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const __m128i zero = _mm_setzero_si128();
+ __m128i s[16], coeffs_y[4];
+
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ for (j = 0; j < w; j += 8) {
+ const uint16_t *data = &src_ptr[j];
+ /* Vertical filter */
+ {
+ __m128i s0 = _mm_loadu_si128((__m128i *)(data + 0 * src_stride));
+ __m128i s1 = _mm_loadu_si128((__m128i *)(data + 1 * src_stride));
+ __m128i s2 = _mm_loadu_si128((__m128i *)(data + 2 * src_stride));
+ __m128i s3 = _mm_loadu_si128((__m128i *)(data + 3 * src_stride));
+ __m128i s4 = _mm_loadu_si128((__m128i *)(data + 4 * src_stride));
+ __m128i s5 = _mm_loadu_si128((__m128i *)(data + 5 * src_stride));
+ __m128i s6 = _mm_loadu_si128((__m128i *)(data + 6 * src_stride));
+
+ s[0] = _mm_unpacklo_epi16(s0, s1);
+ s[1] = _mm_unpacklo_epi16(s2, s3);
+ s[2] = _mm_unpacklo_epi16(s4, s5);
+
+ s[4] = _mm_unpackhi_epi16(s0, s1);
+ s[5] = _mm_unpackhi_epi16(s2, s3);
+ s[6] = _mm_unpackhi_epi16(s4, s5);
+
+ s[0 + 8] = _mm_unpacklo_epi16(s1, s2);
+ s[1 + 8] = _mm_unpacklo_epi16(s3, s4);
+ s[2 + 8] = _mm_unpacklo_epi16(s5, s6);
+
+ s[4 + 8] = _mm_unpackhi_epi16(s1, s2);
+ s[5 + 8] = _mm_unpackhi_epi16(s3, s4);
+ s[6 + 8] = _mm_unpackhi_epi16(s5, s6);
+
+ for (i = 0; i < h; i += 2) {
+ data = &src_ptr[i * src_stride + j];
+
+ __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * src_stride));
+ __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * src_stride));
+
+ s[3] = _mm_unpacklo_epi16(s6, s7);
+ s[7] = _mm_unpackhi_epi16(s6, s7);
+
+ s[3 + 8] = _mm_unpacklo_epi16(s7, s8);
+ s[7 + 8] = _mm_unpackhi_epi16(s7, s8);
+
+ const __m128i res_a0 = convolve(s, coeffs_y);
+ __m128i res_a_round0 = _mm_sll_epi32(res_a0, round_shift_bits);
+ res_a_round0 = _mm_sra_epi32(_mm_add_epi32(res_a_round0, round_const_y),
+ round_shift_y);
+
+ const __m128i res_a1 = convolve(s + 8, coeffs_y);
+ __m128i res_a_round1 = _mm_sll_epi32(res_a1, round_shift_bits);
+ res_a_round1 = _mm_sra_epi32(_mm_add_epi32(res_a_round1, round_const_y),
+ round_shift_y);
+
+ __m128i res_unsigned_lo_0 = _mm_add_epi32(res_a_round0, offset_const);
+ __m128i res_unsigned_lo_1 = _mm_add_epi32(res_a_round1, offset_const);
+
+ if (w - j < 8) {
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_1 = _mm_loadl_epi64(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
+
+ const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
+ const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero);
+
+ const __m128i comp_avg_res_0 = highbd_comp_avg_sse4_1(
+ &data_ref_0, &res_unsigned_lo_0, &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_1 = highbd_comp_avg_sse4_1(
+ &data_ref_1, &res_unsigned_lo_1, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_0 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_0, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m128i round_result_1 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_1, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m128i res_16b_0 =
+ _mm_packus_epi32(round_result_0, round_result_0);
+ const __m128i res_clip_0 =
+ _mm_min_epi16(res_16b_0, clip_pixel_to_bd);
+ const __m128i res_16b_1 =
+ _mm_packus_epi32(round_result_1, round_result_1);
+ const __m128i res_clip_1 =
+ _mm_min_epi16(res_16b_1, clip_pixel_to_bd);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]),
+ res_clip_0);
+ _mm_storel_epi64(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
+ res_clip_1);
+
+ } else {
+ __m128i res_16b_0 =
+ _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_lo_0);
+
+ __m128i res_16b_1 =
+ _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_lo_1);
+
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b_0);
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
+ res_16b_1);
+ }
+ } else {
+ const __m128i res_b0 = convolve(s + 4, coeffs_y);
+ __m128i res_b_round0 = _mm_sll_epi32(res_b0, round_shift_bits);
+ res_b_round0 = _mm_sra_epi32(
+ _mm_add_epi32(res_b_round0, round_const_y), round_shift_y);
+
+ const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y);
+ __m128i res_b_round1 = _mm_sll_epi32(res_b1, round_shift_bits);
+ res_b_round1 = _mm_sra_epi32(
+ _mm_add_epi32(res_b_round1, round_const_y), round_shift_y);
+
+ __m128i res_unsigned_hi_0 = _mm_add_epi32(res_b_round0, offset_const);
+ __m128i res_unsigned_hi_1 = _mm_add_epi32(res_b_round1, offset_const);
+
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_1 = _mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
+ const __m128i data_ref_0_lo_0 = _mm_unpacklo_epi16(data_0, zero);
+ const __m128i data_ref_0_lo_1 = _mm_unpacklo_epi16(data_1, zero);
+
+ const __m128i data_ref_0_hi_0 = _mm_unpackhi_epi16(data_0, zero);
+ const __m128i data_ref_0_hi_1 = _mm_unpackhi_epi16(data_1, zero);
+
+ const __m128i comp_avg_res_lo_0 =
+ highbd_comp_avg_sse4_1(&data_ref_0_lo_0, &res_unsigned_lo_0,
+ &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_lo_1 =
+ highbd_comp_avg_sse4_1(&data_ref_0_lo_1, &res_unsigned_lo_1,
+ &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_hi_0 =
+ highbd_comp_avg_sse4_1(&data_ref_0_hi_0, &res_unsigned_hi_0,
+ &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_hi_1 =
+ highbd_comp_avg_sse4_1(&data_ref_0_hi_1, &res_unsigned_hi_1,
+ &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_lo_0 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_lo_0, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m128i round_result_lo_1 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_lo_1, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m128i round_result_hi_0 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_hi_0, &offset_const,
+ &rounding_const, rounding_shift);
+ const __m128i round_result_hi_1 =
+ highbd_convolve_rounding_sse2(&comp_avg_res_hi_1, &offset_const,
+ &rounding_const, rounding_shift);
+
+ const __m128i res_16b_0 =
+ _mm_packus_epi32(round_result_lo_0, round_result_hi_0);
+ const __m128i res_clip_0 =
+ _mm_min_epi16(res_16b_0, clip_pixel_to_bd);
+
+ const __m128i res_16b_1 =
+ _mm_packus_epi32(round_result_lo_1, round_result_hi_1);
+ const __m128i res_clip_1 =
+ _mm_min_epi16(res_16b_1, clip_pixel_to_bd);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]),
+ res_clip_0);
+ _mm_store_si128(
+ (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
+ res_clip_1);
+ } else {
+ __m128i res_16bit0 =
+ _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_hi_0);
+ __m128i res_16bit1 =
+ _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_hi_1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16bit0);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_16bit1);
+ }
+ }
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+
+ s[0 + 8] = s[1 + 8];
+ s[1 + 8] = s[2 + 8];
+ s[2 + 8] = s[3 + 8];
+
+ s[4 + 8] = s[5 + 8];
+ s[5 + 8] = s[6 + 8];
+ s[6 + 8] = s[7 + 8];
+
+ s6 = s8;
+ }
+ }
+ }
+}
+
+void av1_highbd_jnt_convolve_x_sse4_1(
+ const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
+ int h, InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y, const int subpel_x_q4,
+ const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ int i, j;
+ __m128i s[4], coeffs_x[4];
+
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i round_const_x =
+ _mm_set1_epi32(((1 << conv_params->round_0) >> 1));
+ const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
+ const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi32(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
+ const __m128i clip_pixel_to_bd =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+
+ assert(bits >= 0);
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ for (i = 0; i < h; i += 1) {
+ const __m128i row00 =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+ const __m128i row01 =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]);
+
+ // even pixels
+ s[0] = _mm_alignr_epi8(row01, row00, 0);
+ s[1] = _mm_alignr_epi8(row01, row00, 4);
+ s[2] = _mm_alignr_epi8(row01, row00, 8);
+ s[3] = _mm_alignr_epi8(row01, row00, 12);
+
+ __m128i res_even = convolve(s, coeffs_x);
+ res_even =
+ _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x), round_shift_x);
+
+ // odd pixels
+ s[0] = _mm_alignr_epi8(row01, row00, 2);
+ s[1] = _mm_alignr_epi8(row01, row00, 6);
+ s[2] = _mm_alignr_epi8(row01, row00, 10);
+ s[3] = _mm_alignr_epi8(row01, row00, 14);
+
+ __m128i res_odd = convolve(s, coeffs_x);
+ res_odd =
+ _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x);
+
+ res_even = _mm_sll_epi32(res_even, round_shift_bits);
+ res_odd = _mm_sll_epi32(res_odd, round_shift_bits);
+
+ __m128i res1 = _mm_unpacklo_epi32(res_even, res_odd);
+ __m128i res_unsigned_lo = _mm_add_epi32(res1, offset_const);
+ if (w - j < 8) {
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
+
+ const __m128i comp_avg_res = highbd_comp_avg_sse4_1(
+ &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i round_result = highbd_convolve_rounding_sse2(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_16b = _mm_packus_epi32(round_result, round_result);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo);
+ _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b);
+ }
+ } else {
+ __m128i res2 = _mm_unpackhi_epi32(res_even, res_odd);
+ __m128i res_unsigned_hi = _mm_add_epi32(res2, offset_const);
+ if (do_average) {
+ const __m128i data_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+ const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero);
+ const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero);
+
+ const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
+ &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
+ const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
+ &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
+
+ const __m128i round_result_lo = highbd_convolve_rounding_sse2(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+ const __m128i round_result_hi = highbd_convolve_rounding_sse2(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_16b =
+ _mm_packus_epi32(round_result_lo, round_result_hi);
+ const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
+ } else {
+ __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b);
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h b/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h
index fb246674a..b29bd1d79 100644
--- a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h
+++ b/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h
@@ -90,4 +90,14 @@ static INLINE __m128i half_btf_sse4_1(const __m128i *w0, const __m128i *n0,
return x;
}
+static INLINE __m128i half_btf_0_sse4_1(const __m128i *w0, const __m128i *n0,
+ const __m128i *rounding, int bit) {
+ __m128i x;
+
+ x = _mm_mullo_epi32(*w0, *n0);
+ x = _mm_add_epi32(x, *rounding);
+ x = _mm_srai_epi32(x, bit);
+ return x;
+}
+
#endif // _HIGHBD_TXFM_UTILITY_SSE4_H
diff --git a/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c b/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c
new file mode 100644
index 000000000..a08beaafd
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <smmintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/warped_motion.h"
+
+static const uint8_t warp_highbd_arrange_bytes[16] = {
+ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
+};
+
+static INLINE void horizontal_filter(__m128i src, __m128i src2, __m128i *tmp,
+ int sx, int alpha, int k,
+ const int offset_bits_horiz,
+ const int reduce_bits_horiz) {
+ // Filter even-index pixels
+ const __m128i tmp_0 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_2 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_4 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_6 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS)));
+
+ // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
+ const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
+ // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6
+ const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
+ // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2
+ const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
+ // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6
+ const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
+
+ // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6
+ const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
+ // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6
+ const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
+ // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6
+ const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
+ // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
+ const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
+
+ const __m128i round_const = _mm_set1_epi32((1 << offset_bits_horiz) +
+ ((1 << reduce_bits_horiz) >> 1));
+
+ // Calculate filtered results
+ const __m128i res_0 = _mm_madd_epi16(src, coeff_0);
+ const __m128i res_2 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 4), coeff_2);
+ const __m128i res_4 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 8), coeff_4);
+ const __m128i res_6 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 12), coeff_6);
+
+ __m128i res_even =
+ _mm_add_epi32(_mm_add_epi32(res_0, res_4), _mm_add_epi32(res_2, res_6));
+ res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
+ _mm_cvtsi32_si128(reduce_bits_horiz));
+
+ // Filter odd-index pixels
+ const __m128i tmp_1 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_3 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_5 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_7 = _mm_loadu_si128(
+ (__m128i *)(warped_filter + ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS)));
+
+ const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
+ const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
+ const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
+ const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
+
+ const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
+ const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
+ const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
+ const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
+
+ const __m128i res_1 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 2), coeff_1);
+ const __m128i res_3 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 6), coeff_3);
+ const __m128i res_5 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 10), coeff_5);
+ const __m128i res_7 = _mm_madd_epi16(_mm_alignr_epi8(src2, src, 14), coeff_7);
+
+ __m128i res_odd =
+ _mm_add_epi32(_mm_add_epi32(res_1, res_5), _mm_add_epi32(res_3, res_7));
+ res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
+ _mm_cvtsi32_si128(reduce_bits_horiz));
+
+ // Combine results into one register.
+ // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
+ // as this order helps with the vertical filter.
+ tmp[k + 7] = _mm_packs_epi32(res_even, res_odd);
+}
+
+void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
+ int width, int height, int stride,
+ uint16_t *pred, int p_col, int p_row,
+ int p_width, int p_height, int p_stride,
+ int subsampling_x, int subsampling_y, int bd,
+ ConvolveParams *conv_params, int16_t alpha,
+ int16_t beta, int16_t gamma, int16_t delta) {
+ __m128i tmp[15];
+ int i, j, k;
+ const int reduce_bits_horiz =
+ conv_params->round_0 +
+ AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+ const int reduce_bits_vert = conv_params->is_compound
+ ? conv_params->round_1
+ : 2 * FILTER_BITS - reduce_bits_horiz;
+ const int offset_bits_horiz = bd + FILTER_BITS - 1;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+ assert(!(bd == 12 && reduce_bits_horiz < 5));
+ assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
+
+ const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
+ const __m128i clip_pixel =
+ _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
+ const __m128i reduce_bits_vert_shift = _mm_cvtsi32_si128(reduce_bits_vert);
+ const __m128i reduce_bits_vert_const =
+ _mm_set1_epi32(((1 << reduce_bits_vert) >> 1));
+ const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert);
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const __m128i res_sub_const =
+ _mm_set1_epi32(-(1 << (offset_bits - conv_params->round_1)) -
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits);
+ __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1));
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi32(w0);
+ const __m128i wt1 = _mm_set1_epi32(w1);
+
+ /* Note: For this code to work, the left/right frame borders need to be
+ extended by at least 13 pixels each. By the time we get here, other
+ code will have set up this border, but we allow an explicit check
+ for debugging purposes.
+ */
+ /*for (i = 0; i < height; ++i) {
+ for (j = 0; j < 13; ++j) {
+ assert(ref[i * stride - 13 + j] == ref[i * stride]);
+ assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
+ }
+ }*/
+
+ for (i = 0; i < p_height; i += 8) {
+ for (j = 0; j < p_width; j += 8) {
+ const int32_t src_x = (p_col + j + 4) << subsampling_x;
+ const int32_t src_y = (p_row + i + 4) << subsampling_y;
+ const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+ const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+ const int32_t x4 = dst_x >> subsampling_x;
+ const int32_t y4 = dst_y >> subsampling_y;
+
+ int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+ int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+ int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+ int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+
+ // Add in all the constant terms, including rounding and offset
+ sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
+ (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
+ sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
+ (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
+
+ sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
+ sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
+
+ // Horizontal filter
+ // If the block is aligned such that, after clamping, every sample
+ // would be taken from the leftmost/rightmost column, then we can
+ // skip the expensive horizontal filter.
+ if (ix4 <= -7) {
+ for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+ int iy = iy4 + k;
+ if (iy < 0)
+ iy = 0;
+ else if (iy > height - 1)
+ iy = height - 1;
+ tmp[k + 7] = _mm_set1_epi16(
+ (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+ ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
+ }
+ } else if (ix4 >= width + 6) {
+ for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+ int iy = iy4 + k;
+ if (iy < 0)
+ iy = 0;
+ else if (iy > height - 1)
+ iy = height - 1;
+ tmp[k + 7] =
+ _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+ ref[iy * stride + (width - 1)] *
+ (1 << (FILTER_BITS - reduce_bits_horiz)));
+ }
+ } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
+ const int out_of_boundary_left = -(ix4 - 6);
+ const int out_of_boundary_right = (ix4 + 8) - width;
+
+ for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+ int iy = iy4 + k;
+ if (iy < 0)
+ iy = 0;
+ else if (iy > height - 1)
+ iy = height - 1;
+ int sx = sx4 + beta * (k + 4);
+
+ // Load source pixels
+ const __m128i src =
+ _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
+ const __m128i src2 =
+ _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
+
+ const __m128i src_01 = _mm_shuffle_epi8(
+ src, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes));
+ const __m128i src2_01 = _mm_shuffle_epi8(
+ src2, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes));
+
+ __m128i src_lo = _mm_unpacklo_epi64(src_01, src2_01);
+ __m128i src_hi = _mm_unpackhi_epi64(src_01, src2_01);
+
+ if (out_of_boundary_left >= 0) {
+ const __m128i shuffle_reg_left =
+ _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]);
+ src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_left);
+ src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_left);
+ }
+
+ if (out_of_boundary_right >= 0) {
+ const __m128i shuffle_reg_right = _mm_loadu_si128(
+ (__m128i *)warp_pad_right[out_of_boundary_right]);
+ src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_right);
+ src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_right);
+ }
+
+ const __m128i src_padded = _mm_unpacklo_epi8(src_lo, src_hi);
+ const __m128i src2_padded = _mm_unpackhi_epi8(src_lo, src_hi);
+
+ horizontal_filter(src_padded, src2_padded, tmp, sx, alpha, k,
+ offset_bits_horiz, reduce_bits_horiz);
+ }
+ } else {
+ for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+ int iy = iy4 + k;
+ if (iy < 0)
+ iy = 0;
+ else if (iy > height - 1)
+ iy = height - 1;
+ int sx = sx4 + beta * (k + 4);
+
+ // Load source pixels
+ const __m128i src =
+ _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
+ const __m128i src2 =
+ _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
+
+ horizontal_filter(src, src2, tmp, sx, alpha, k, offset_bits_horiz,
+ reduce_bits_horiz);
+ }
+ }
+
+ // Vertical filter
+ for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
+ int sy = sy4 + delta * (k + 4);
+
+ // Load from tmp and rearrange pairs of consecutive rows into the
+ // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
+ const __m128i *src = tmp + (k + 4);
+ const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
+ const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
+ const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
+ const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
+
+ // Filter even-index pixels
+ const __m128i tmp_0 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_2 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_4 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_6 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
+
+ const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
+ const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
+ const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
+ const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
+
+ const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
+ const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
+ const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
+ const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
+
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
+
+ const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
+ _mm_add_epi32(res_4, res_6));
+
+ // Filter odd-index pixels
+ const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
+ const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
+ const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
+ const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
+
+ const __m128i tmp_1 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_3 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_5 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
+ const __m128i tmp_7 = _mm_loadu_si128(
+ (__m128i *)(warped_filter +
+ ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
+
+ const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
+ const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
+ const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
+ const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
+
+ const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
+ const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
+ const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
+ const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
+
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
+
+ const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
+ _mm_add_epi32(res_5, res_7));
+
+ // Rearrange pixels back into the order 0 ... 7
+ __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+ if (conv_params->is_compound) {
+ __m128i *const p =
+ (__m128i *)&conv_params
+ ->dst[(i + k + 4) * conv_params->dst_stride + j];
+ res_lo = _mm_add_epi32(res_lo, res_add_const);
+ res_lo = _mm_sra_epi32(_mm_add_epi32(res_lo, reduce_bits_vert_const),
+ reduce_bits_vert_shift);
+
+ if (conv_params->do_average) {
+ __m128i *const dst16 = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+ __m128i p_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p));
+
+ if (conv_params->use_jnt_comp_avg) {
+ res_lo = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0),
+ _mm_mullo_epi32(res_lo, wt1));
+ res_lo = _mm_srai_epi32(res_lo, DIST_PRECISION_BITS);
+ } else {
+ res_lo = _mm_srai_epi32(_mm_add_epi32(p_32, res_lo), 1);
+ }
+
+ __m128i res32_lo = _mm_add_epi32(res_lo, res_sub_const);
+ res32_lo = _mm_sra_epi32(_mm_add_epi32(res32_lo, round_bits_const),
+ round_bits_shift);
+
+ __m128i res16_lo = _mm_packus_epi32(res32_lo, res32_lo);
+ res16_lo = _mm_min_epi16(res16_lo, clip_pixel);
+ _mm_storel_epi64(dst16, res16_lo);
+ } else {
+ res_lo = _mm_packus_epi32(res_lo, res_lo);
+ _mm_storel_epi64(p, res_lo);
+ }
+ if (p_width > 4) {
+ __m128i *const p4 =
+ (__m128i *)&conv_params
+ ->dst[(i + k + 4) * conv_params->dst_stride + j + 4];
+
+ res_hi = _mm_add_epi32(res_hi, res_add_const);
+ res_hi =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, reduce_bits_vert_const),
+ reduce_bits_vert_shift);
+ if (conv_params->do_average) {
+ __m128i *const dst16_4 =
+ (__m128i *)&pred[(i + k + 4) * p_stride + j + 4];
+ __m128i p4_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p4));
+
+ if (conv_params->use_jnt_comp_avg) {
+ res_hi = _mm_add_epi32(_mm_mullo_epi32(p4_32, wt0),
+ _mm_mullo_epi32(res_hi, wt1));
+ res_hi = _mm_srai_epi32(res_hi, DIST_PRECISION_BITS);
+ } else {
+ res_hi = _mm_srai_epi32(_mm_add_epi32(p4_32, res_hi), 1);
+ }
+
+ __m128i res32_hi = _mm_add_epi32(res_hi, res_sub_const);
+ res32_hi = _mm_sra_epi32(
+ _mm_add_epi32(res32_hi, round_bits_const), round_bits_shift);
+ __m128i res16_hi = _mm_packus_epi32(res32_hi, res32_hi);
+ res16_hi = _mm_min_epi16(res16_hi, clip_pixel);
+ _mm_storel_epi64(dst16_4, res16_hi);
+ } else {
+ res_hi = _mm_packus_epi32(res_hi, res_hi);
+ _mm_storel_epi64(p4, res_hi);
+ }
+ }
+ } else {
+ // Round and pack into 8 bits
+ const __m128i round_const =
+ _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
+ ((1 << reduce_bits_vert) >> 1));
+
+ const __m128i res_lo_round = _mm_srai_epi32(
+ _mm_add_epi32(res_lo, round_const), reduce_bits_vert);
+ const __m128i res_hi_round = _mm_srai_epi32(
+ _mm_add_epi32(res_hi, round_const), reduce_bits_vert);
+
+ __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+ // Clamp res_16bit to the range [0, 2^bd - 1]
+ const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
+ const __m128i zero = _mm_setzero_si128();
+ res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
+
+ // Store, blending with 'pred' if needed
+ __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+
+ // Note: If we're outputting a 4x4 block, we need to be very careful
+ // to only output 4 pixels at this point, to avoid encode/decode
+ // mismatches when encoding with multiple threads.
+ if (p_width == 4) {
+ _mm_storel_epi64(p, res_16bit);
+ } else {
+ _mm_storeu_si128(p, res_16bit);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c b/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c
deleted file mode 100644
index 71b0ec7a3..000000000
--- a/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "./av1_rtcd.h"
-#include "av1/common/warped_motion.h"
-
-void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
- int width, int height, int stride,
- uint16_t *pred, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int bd,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- int comp_avg = conv_params->do_average;
-#if HORSHEAR_REDUCE_PREC_BITS >= 5
- __m128i tmp[15];
-#else
-#error "HORSHEAR_REDUCE_PREC_BITS < 5 not currently supported by SSSE3 filter"
-#endif
- int i, j, k;
-#if CONFIG_CONVOLVE_ROUND
- const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
- const int reduce_bits_horiz =
- use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz =
- use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
- const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
-
- /* Note: For this code to work, the left/right frame borders need to be
- extended by at least 13 pixels each. By the time we get here, other
- code will have set up this border, but we allow an explicit check
- for debugging purposes.
- */
- /*for (i = 0; i < height; ++i) {
- for (j = 0; j < 13; ++j) {
- assert(ref[i * stride - 13 + j] == ref[i * stride]);
- assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
- }
- }*/
-
- for (i = 0; i < p_height; i += 8) {
- for (j = 0; j < p_width; j += 8) {
- const int32_t src_x = (p_col + j + 4) << subsampling_x;
- const int32_t src_y = (p_row + i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- // Add in all the constant terms, including rounding and offset
- sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
- sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- // If the block is aligned such that, after clamping, every sample
- // would be taken from the leftmost/rightmost column, then we can
- // skip the expensive horizontal filter.
- if (ix4 <= -7) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
- }
- } else if (ix4 >= width + 6) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
- }
- } else {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6
- const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
- // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6
- const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
- // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6
- const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
- const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i round_const = _mm_set1_epi32(
- (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
-
- // Calculate filtered results
- const __m128i res_0 = _mm_madd_epi16(src, coeff_0);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 4), coeff_2);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 8), coeff_4);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 12), coeff_6);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- // Filter odd-index pixels
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
- const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
- const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 2), coeff_1);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 6), coeff_3);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 10), coeff_5);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(src2, src, 14), coeff_7);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- // Combine results into one register.
- // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
- // as this order helps with the vertical filter.
- tmp[k + 7] = _mm_packs_epi32(res_even, res_odd);
- }
- }
-
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- // Load from tmp and rearrange pairs of consecutive rows into the
- // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
- const __m128i *src = tmp + (k + 4);
- const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
- const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
- const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
- const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
- const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
- const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
- const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
- const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
- const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
- const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
- const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-#if CONFIG_CONVOLVE_ROUND
- if (use_conv_params) {
- __m128i *const p =
- (__m128i *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j];
- const __m128i round_const = _mm_set1_epi32(
- -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
- ((1 << (conv_params->round_1)) >> 1));
- res_lo = _mm_add_epi32(res_lo, round_const);
- res_lo =
- _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
- _mm_storeu_si128(p, res_lo);
- if (p_width > 4) {
- res_hi = _mm_add_epi32(res_hi, round_const);
- res_hi =
- _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg)
- res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
- _mm_storeu_si128(p + 1, res_hi);
- }
- } else {
-#else
- {
-#endif
- // Round and pack into 8 bits
- const __m128i round_const =
- _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
- ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
-
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
-
- __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- // Clamp res_16bit to the range [0, 2^bd - 1]
- const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
- const __m128i zero = _mm_setzero_si128();
- res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
-
- // Store, blending with 'pred' if needed
- __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
- // Note: If we're outputting a 4x4 block, we need to be very careful
- // to only output 4 pixels at this point, to avoid encode/decode
- // mismatches when encoding with multiple threads.
- if (p_width == 4) {
- if (comp_avg)
- res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
- _mm_storel_epi64(p, res_16bit);
- } else {
- if (comp_avg)
- res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
- _mm_storeu_si128(p, res_16bit);
- }
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c
new file mode 100644
index 000000000..0c8a8505b
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "av1/common/convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "aom_dsp/x86/synonyms_avx2.h"
+
+// 128-bit xmmwords are written as [ ... ] with the MSB on the left.
+// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB
+// on the left.
+// A row of, say, 16-bit pixels with values p0, p1, p2, ..., p14, p15 will be
+// loaded and stored as [ p15 ... p9 p8 ][ p7 ... p1 p0 ].
+void av1_highbd_wiener_convolve_add_src_avx2(
+ const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8,
+ ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4, int w, int h,
+ const ConvolveParams *conv_params, int bd) {
+ assert(x_step_q4 == 16 && y_step_q4 == 16);
+ assert(!(w & 7));
+ assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ const uint16_t *const src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8);
+
+ DECLARE_ALIGNED(32, uint16_t,
+ temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
+ int intermediate_height = h + SUBPEL_TAPS - 1;
+ const int center_tap = ((SUBPEL_TAPS - 1) / 2);
+ const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap;
+
+ const __m128i zero_128 = _mm_setzero_si128();
+ const __m256i zero_256 = _mm256_setzero_si256();
+
+ // Add an offset to account for the "add_src" part of the convolve function.
+ const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3);
+
+ const __m256i clamp_low = zero_256;
+
+ /* Horizontal filter */
+ {
+ const __m256i clamp_high_ep =
+ _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1);
+
+ // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
+ const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset);
+
+ // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
+ const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
+ const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
+
+ const __m256i round_const = _mm256_set1_epi32(
+ (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
+
+ for (int i = 0; i < intermediate_height; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ const uint16_t *src_ij = src_ptr + i * src_stride + j;
+
+ // Load 16-bit src data
+ const __m256i src_0 = yy_loadu_256(src_ij + 0);
+ const __m256i src_1 = yy_loadu_256(src_ij + 1);
+ const __m256i src_2 = yy_loadu_256(src_ij + 2);
+ const __m256i src_3 = yy_loadu_256(src_ij + 3);
+ const __m256i src_4 = yy_loadu_256(src_ij + 4);
+ const __m256i src_5 = yy_loadu_256(src_ij + 5);
+ const __m256i src_6 = yy_loadu_256(src_ij + 6);
+ const __m256i src_7 = yy_loadu_256(src_ij + 7);
+
+ // Multiply src data by filter coeffs and sum pairs
+ const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
+ const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
+ const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
+ const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
+ const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
+ const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
+ const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
+ const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
+
+ // Calculate scalar product for even- and odd-indices separately,
+ // increasing to 32-bit precision
+ const __m256i res_even_sum = _mm256_add_epi32(
+ _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6));
+ const __m256i res_even = _mm256_srai_epi32(
+ _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0);
+
+ const __m256i res_odd_sum = _mm256_add_epi32(
+ _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7));
+ const __m256i res_odd = _mm256_srai_epi32(
+ _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0);
+
+ // Reduce to 16-bit precision and pack even- and odd-index results
+ // back into one register. The _mm256_packs_epi32 intrinsic returns
+ // a register with the pixels ordered as follows:
+ // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
+ const __m256i res = _mm256_packs_epi32(res_even, res_odd);
+ const __m256i res_clamped =
+ _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high_ep);
+
+ // Store in a temporary array
+ yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ const __m256i clamp_high = _mm256_set1_epi16((1 << bd) - 1);
+
+ // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
+ const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset);
+
+ // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
+ const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
+ const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
+
+ const __m256i round_const =
+ _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) -
+ (1 << (bd + conv_params->round_1 - 1)));
+
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ const uint16_t *temp_ij = temp + i * MAX_SB_SIZE + j;
+
+ // Load 16-bit data from the output of the horizontal filter in
+ // which the pixels are ordered as follows:
+ // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
+ const __m256i data_0 = yy_loadu_256(temp_ij + 0 * MAX_SB_SIZE);
+ const __m256i data_1 = yy_loadu_256(temp_ij + 1 * MAX_SB_SIZE);
+ const __m256i data_2 = yy_loadu_256(temp_ij + 2 * MAX_SB_SIZE);
+ const __m256i data_3 = yy_loadu_256(temp_ij + 3 * MAX_SB_SIZE);
+ const __m256i data_4 = yy_loadu_256(temp_ij + 4 * MAX_SB_SIZE);
+ const __m256i data_5 = yy_loadu_256(temp_ij + 5 * MAX_SB_SIZE);
+ const __m256i data_6 = yy_loadu_256(temp_ij + 6 * MAX_SB_SIZE);
+ const __m256i data_7 = yy_loadu_256(temp_ij + 7 * MAX_SB_SIZE);
+
+ // Filter the even-indices, increasing to 32-bit precision
+ const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1);
+ const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3);
+ const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5);
+ const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7);
+
+ const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
+ const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
+ const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
+ const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
+
+ const __m256i res_even = _mm256_add_epi32(
+ _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
+
+ // Filter the odd-indices, increasing to 32-bit precision
+ const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1);
+ const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3);
+ const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5);
+ const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7);
+
+ const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
+ const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
+ const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
+ const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
+
+ const __m256i res_odd = _mm256_add_epi32(
+ _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
+
+ // Pixels are currently in the following order:
+ // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ]
+ // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ]
+ //
+ // Rearrange the pixels into the following order:
+ // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ]
+ // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ]
+ const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
+ const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
+
+ const __m256i res_lo_round = _mm256_srai_epi32(
+ _mm256_add_epi32(res_lo, round_const), conv_params->round_1);
+ const __m256i res_hi_round = _mm256_srai_epi32(
+ _mm256_add_epi32(res_hi, round_const), conv_params->round_1);
+
+ // Reduce to 16-bit precision and pack into the correct order:
+ // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ]
+ const __m256i res_16bit =
+ _mm256_packs_epi32(res_lo_round, res_hi_round);
+ const __m256i res_16bit_clamped = _mm256_min_epi16(
+ _mm256_max_epi16(res_16bit, clamp_low), clamp_high);
+
+ // Store in the dst array
+ yy_storeu_256(dst + i * dst_stride + j, res_16bit_clamped);
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c
new file mode 100644
index 000000000..818b1099c
--- /dev/null
+++ b/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+#include <assert.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "av1/common/convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+
+void av1_highbd_wiener_convolve_add_src_ssse3(
+ const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8,
+ ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4, int w, int h,
+ const ConvolveParams *conv_params, int bd) {
+ assert(x_step_q4 == 16 && y_step_q4 == 16);
+ assert(!(w & 7));
+ assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ const uint16_t *const src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8);
+
+ DECLARE_ALIGNED(16, uint16_t,
+ temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
+ int intermediate_height = h + SUBPEL_TAPS - 1;
+ int i, j;
+ const int center_tap = ((SUBPEL_TAPS - 1) / 2);
+ const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap;
+
+ const __m128i zero = _mm_setzero_si128();
+ // Add an offset to account for the "add_src" part of the convolve function.
+ const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3);
+
+ /* Horizontal filter */
+ {
+ const __m128i coeffs_x =
+ _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
+
+ for (i = 0; i < intermediate_height; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+ const __m128i data2 =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
+
+ // Filter even-index pixels
+ const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
+ const __m128i res_2 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
+ const __m128i res_4 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
+ const __m128i res_6 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
+
+ __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
+ _mm_add_epi32(res_2, res_6));
+ res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
+ conv_params->round_0);
+
+ // Filter odd-index pixels
+ const __m128i res_1 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
+ const __m128i res_3 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
+ const __m128i res_5 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
+ const __m128i res_7 =
+ _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
+
+ __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
+ _mm_add_epi32(res_3, res_7));
+ res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
+ conv_params->round_0);
+
+ // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
+ const __m128i maxval =
+ _mm_set1_epi16((WIENER_CLAMP_LIMIT(conv_params->round_0, bd)) - 1);
+ __m128i res = _mm_packs_epi32(res_even, res_odd);
+ res = _mm_min_epi16(_mm_max_epi16(res, zero), maxval);
+ _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ const __m128i coeffs_y =
+ _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const =
+ _mm_set1_epi32((1 << (conv_params->round_1 - 1)) -
+ (1 << (bd + conv_params->round_1 - 1)));
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ // Filter even-index pixels
+ const uint16_t *data = &temp[i * MAX_SB_SIZE + j];
+ const __m128i src_0 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
+ *(__m128i *)(data + 1 * MAX_SB_SIZE));
+ const __m128i src_2 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
+ *(__m128i *)(data + 3 * MAX_SB_SIZE));
+ const __m128i src_4 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
+ *(__m128i *)(data + 5 * MAX_SB_SIZE));
+ const __m128i src_6 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
+ *(__m128i *)(data + 7 * MAX_SB_SIZE));
+
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
+ _mm_add_epi32(res_4, res_6));
+
+ // Filter odd-index pixels
+ const __m128i src_1 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
+ *(__m128i *)(data + 1 * MAX_SB_SIZE));
+ const __m128i src_3 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
+ *(__m128i *)(data + 3 * MAX_SB_SIZE));
+ const __m128i src_5 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
+ *(__m128i *)(data + 5 * MAX_SB_SIZE));
+ const __m128i src_7 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
+ *(__m128i *)(data + 7 * MAX_SB_SIZE));
+
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
+ _mm_add_epi32(res_5, res_7));
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+ const __m128i res_lo_round = _mm_srai_epi32(
+ _mm_add_epi32(res_lo, round_const), conv_params->round_1);
+ const __m128i res_hi_round = _mm_srai_epi32(
+ _mm_add_epi32(res_hi, round_const), conv_params->round_1);
+
+ const __m128i maxval = _mm_set1_epi16((1 << bd) - 1);
+ __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+ res_16bit = _mm_min_epi16(_mm_max_epi16(res_16bit, zero), maxval);
+
+ __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
+ _mm_storeu_si128(p, res_16bit);
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
deleted file mode 100644
index c440d0f88..000000000
--- a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h> // avx2
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-
-#include "aom_dsp/x86/inv_txfm_common_avx2.h"
-
-void av1_idct16_avx2(__m256i *in) {
- const __m256i cospi_p30_m02 = pair256_set_epi16(cospi_30_64, -cospi_2_64);
- const __m256i cospi_p02_p30 = pair256_set_epi16(cospi_2_64, cospi_30_64);
- const __m256i cospi_p14_m18 = pair256_set_epi16(cospi_14_64, -cospi_18_64);
- const __m256i cospi_p18_p14 = pair256_set_epi16(cospi_18_64, cospi_14_64);
- const __m256i cospi_p22_m10 = pair256_set_epi16(cospi_22_64, -cospi_10_64);
- const __m256i cospi_p10_p22 = pair256_set_epi16(cospi_10_64, cospi_22_64);
- const __m256i cospi_p06_m26 = pair256_set_epi16(cospi_6_64, -cospi_26_64);
- const __m256i cospi_p26_p06 = pair256_set_epi16(cospi_26_64, cospi_6_64);
- const __m256i cospi_p28_m04 = pair256_set_epi16(cospi_28_64, -cospi_4_64);
- const __m256i cospi_p04_p28 = pair256_set_epi16(cospi_4_64, cospi_28_64);
- const __m256i cospi_p12_m20 = pair256_set_epi16(cospi_12_64, -cospi_20_64);
- const __m256i cospi_p20_p12 = pair256_set_epi16(cospi_20_64, cospi_12_64);
- const __m256i cospi_p16_p16 = _mm256_set1_epi16((int16_t)cospi_16_64);
- const __m256i cospi_p16_m16 = pair256_set_epi16(cospi_16_64, -cospi_16_64);
- const __m256i cospi_p24_m08 = pair256_set_epi16(cospi_24_64, -cospi_8_64);
- const __m256i cospi_p08_p24 = pair256_set_epi16(cospi_8_64, cospi_24_64);
- const __m256i cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64);
- const __m256i cospi_p24_p08 = pair256_set_epi16(cospi_24_64, cospi_8_64);
- const __m256i cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64);
- __m256i u0, u1, u2, u3, u4, u5, u6, u7;
- __m256i v0, v1, v2, v3, v4, v5, v6, v7;
- __m256i t0, t1, t2, t3, t4, t5, t6, t7;
-
- // stage 1, (0-7)
- u0 = in[0];
- u1 = in[8];
- u2 = in[4];
- u3 = in[12];
- u4 = in[2];
- u5 = in[10];
- u6 = in[6];
- u7 = in[14];
-
- // stage 2, (0-7)
- // stage 3, (0-7)
- t0 = u0;
- t1 = u1;
- t2 = u2;
- t3 = u3;
- unpack_butter_fly(&u4, &u7, &cospi_p28_m04, &cospi_p04_p28, &t4, &t7);
- unpack_butter_fly(&u5, &u6, &cospi_p12_m20, &cospi_p20_p12, &t5, &t6);
-
- // stage 4, (0-7)
- unpack_butter_fly(&t0, &t1, &cospi_p16_p16, &cospi_p16_m16, &u0, &u1);
- unpack_butter_fly(&t2, &t3, &cospi_p24_m08, &cospi_p08_p24, &u2, &u3);
- u4 = _mm256_add_epi16(t4, t5);
- u5 = _mm256_sub_epi16(t4, t5);
- u6 = _mm256_sub_epi16(t7, t6);
- u7 = _mm256_add_epi16(t7, t6);
-
- // stage 5, (0-7)
- t0 = _mm256_add_epi16(u0, u3);
- t1 = _mm256_add_epi16(u1, u2);
- t2 = _mm256_sub_epi16(u1, u2);
- t3 = _mm256_sub_epi16(u0, u3);
- t4 = u4;
- t7 = u7;
- unpack_butter_fly(&u6, &u5, &cospi_p16_m16, &cospi_p16_p16, &t5, &t6);
-
- // stage 6, (0-7)
- u0 = _mm256_add_epi16(t0, t7);
- u1 = _mm256_add_epi16(t1, t6);
- u2 = _mm256_add_epi16(t2, t5);
- u3 = _mm256_add_epi16(t3, t4);
- u4 = _mm256_sub_epi16(t3, t4);
- u5 = _mm256_sub_epi16(t2, t5);
- u6 = _mm256_sub_epi16(t1, t6);
- u7 = _mm256_sub_epi16(t0, t7);
-
- // stage 1, (8-15)
- v0 = in[1];
- v1 = in[9];
- v2 = in[5];
- v3 = in[13];
- v4 = in[3];
- v5 = in[11];
- v6 = in[7];
- v7 = in[15];
-
- // stage 2, (8-15)
- unpack_butter_fly(&v0, &v7, &cospi_p30_m02, &cospi_p02_p30, &t0, &t7);
- unpack_butter_fly(&v1, &v6, &cospi_p14_m18, &cospi_p18_p14, &t1, &t6);
- unpack_butter_fly(&v2, &v5, &cospi_p22_m10, &cospi_p10_p22, &t2, &t5);
- unpack_butter_fly(&v3, &v4, &cospi_p06_m26, &cospi_p26_p06, &t3, &t4);
-
- // stage 3, (8-15)
- v0 = _mm256_add_epi16(t0, t1);
- v1 = _mm256_sub_epi16(t0, t1);
- v2 = _mm256_sub_epi16(t3, t2);
- v3 = _mm256_add_epi16(t2, t3);
- v4 = _mm256_add_epi16(t4, t5);
- v5 = _mm256_sub_epi16(t4, t5);
- v6 = _mm256_sub_epi16(t7, t6);
- v7 = _mm256_add_epi16(t6, t7);
-
- // stage 4, (8-15)
- t0 = v0;
- t7 = v7;
- t3 = v3;
- t4 = v4;
- unpack_butter_fly(&v1, &v6, &cospi_m08_p24, &cospi_p24_p08, &t1, &t6);
- unpack_butter_fly(&v2, &v5, &cospi_m24_m08, &cospi_m08_p24, &t2, &t5);
-
- // stage 5, (8-15)
- v0 = _mm256_add_epi16(t0, t3);
- v1 = _mm256_add_epi16(t1, t2);
- v2 = _mm256_sub_epi16(t1, t2);
- v3 = _mm256_sub_epi16(t0, t3);
- v4 = _mm256_sub_epi16(t7, t4);
- v5 = _mm256_sub_epi16(t6, t5);
- v6 = _mm256_add_epi16(t6, t5);
- v7 = _mm256_add_epi16(t7, t4);
-
- // stage 6, (8-15)
- t0 = v0;
- t1 = v1;
- t6 = v6;
- t7 = v7;
- unpack_butter_fly(&v5, &v2, &cospi_p16_m16, &cospi_p16_p16, &t2, &t5);
- unpack_butter_fly(&v4, &v3, &cospi_p16_m16, &cospi_p16_p16, &t3, &t4);
-
- // stage 7
- in[0] = _mm256_add_epi16(u0, t7);
- in[1] = _mm256_add_epi16(u1, t6);
- in[2] = _mm256_add_epi16(u2, t5);
- in[3] = _mm256_add_epi16(u3, t4);
- in[4] = _mm256_add_epi16(u4, t3);
- in[5] = _mm256_add_epi16(u5, t2);
- in[6] = _mm256_add_epi16(u6, t1);
- in[7] = _mm256_add_epi16(u7, t0);
- in[8] = _mm256_sub_epi16(u7, t0);
- in[9] = _mm256_sub_epi16(u6, t1);
- in[10] = _mm256_sub_epi16(u5, t2);
- in[11] = _mm256_sub_epi16(u4, t3);
- in[12] = _mm256_sub_epi16(u3, t4);
- in[13] = _mm256_sub_epi16(u2, t5);
- in[14] = _mm256_sub_epi16(u1, t6);
- in[15] = _mm256_sub_epi16(u0, t7);
-}
-
-static void idct16(__m256i *in) {
- mm256_transpose_16x16(in, in);
- av1_idct16_avx2(in);
-}
-
-static INLINE void butterfly_32b(const __m256i *a0, const __m256i *a1,
- const __m256i *c0, const __m256i *c1,
- __m256i *b) {
- __m256i x0, x1;
- x0 = _mm256_unpacklo_epi16(*a0, *a1);
- x1 = _mm256_unpackhi_epi16(*a0, *a1);
- b[0] = _mm256_madd_epi16(x0, *c0);
- b[1] = _mm256_madd_epi16(x1, *c0);
- b[2] = _mm256_madd_epi16(x0, *c1);
- b[3] = _mm256_madd_epi16(x1, *c1);
-}
-
-static INLINE void group_rounding(__m256i *a, int num) {
- const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING);
- int i;
- for (i = 0; i < num; ++i) {
- a[i] = _mm256_add_epi32(a[i], dct_rounding);
- a[i] = _mm256_srai_epi32(a[i], DCT_CONST_BITS);
- }
-}
-
-static INLINE void add_rnd(const __m256i *a, const __m256i *b, __m256i *out) {
- __m256i x[4];
- x[0] = _mm256_add_epi32(a[0], b[0]);
- x[1] = _mm256_add_epi32(a[1], b[1]);
- x[2] = _mm256_add_epi32(a[2], b[2]);
- x[3] = _mm256_add_epi32(a[3], b[3]);
-
- group_rounding(x, 4);
-
- out[0] = _mm256_packs_epi32(x[0], x[1]);
- out[1] = _mm256_packs_epi32(x[2], x[3]);
-}
-
-static INLINE void sub_rnd(const __m256i *a, const __m256i *b, __m256i *out) {
- __m256i x[4];
- x[0] = _mm256_sub_epi32(a[0], b[0]);
- x[1] = _mm256_sub_epi32(a[1], b[1]);
- x[2] = _mm256_sub_epi32(a[2], b[2]);
- x[3] = _mm256_sub_epi32(a[3], b[3]);
-
- group_rounding(x, 4);
-
- out[0] = _mm256_packs_epi32(x[0], x[1]);
- out[1] = _mm256_packs_epi32(x[2], x[3]);
-}
-
-static INLINE void butterfly_rnd(__m256i *a, __m256i *out) {
- group_rounding(a, 4);
- out[0] = _mm256_packs_epi32(a[0], a[1]);
- out[1] = _mm256_packs_epi32(a[2], a[3]);
-}
-
-static void iadst16_avx2(__m256i *in) {
- const __m256i cospi_p01_p31 = pair256_set_epi16(cospi_1_64, cospi_31_64);
- const __m256i cospi_p31_m01 = pair256_set_epi16(cospi_31_64, -cospi_1_64);
- const __m256i cospi_p05_p27 = pair256_set_epi16(cospi_5_64, cospi_27_64);
- const __m256i cospi_p27_m05 = pair256_set_epi16(cospi_27_64, -cospi_5_64);
- const __m256i cospi_p09_p23 = pair256_set_epi16(cospi_9_64, cospi_23_64);
- const __m256i cospi_p23_m09 = pair256_set_epi16(cospi_23_64, -cospi_9_64);
- const __m256i cospi_p13_p19 = pair256_set_epi16(cospi_13_64, cospi_19_64);
- const __m256i cospi_p19_m13 = pair256_set_epi16(cospi_19_64, -cospi_13_64);
- const __m256i cospi_p17_p15 = pair256_set_epi16(cospi_17_64, cospi_15_64);
- const __m256i cospi_p15_m17 = pair256_set_epi16(cospi_15_64, -cospi_17_64);
- const __m256i cospi_p21_p11 = pair256_set_epi16(cospi_21_64, cospi_11_64);
- const __m256i cospi_p11_m21 = pair256_set_epi16(cospi_11_64, -cospi_21_64);
- const __m256i cospi_p25_p07 = pair256_set_epi16(cospi_25_64, cospi_7_64);
- const __m256i cospi_p07_m25 = pair256_set_epi16(cospi_7_64, -cospi_25_64);
- const __m256i cospi_p29_p03 = pair256_set_epi16(cospi_29_64, cospi_3_64);
- const __m256i cospi_p03_m29 = pair256_set_epi16(cospi_3_64, -cospi_29_64);
- const __m256i cospi_p04_p28 = pair256_set_epi16(cospi_4_64, cospi_28_64);
- const __m256i cospi_p28_m04 = pair256_set_epi16(cospi_28_64, -cospi_4_64);
- const __m256i cospi_p20_p12 = pair256_set_epi16(cospi_20_64, cospi_12_64);
- const __m256i cospi_p12_m20 = pair256_set_epi16(cospi_12_64, -cospi_20_64);
- const __m256i cospi_m28_p04 = pair256_set_epi16(-cospi_28_64, cospi_4_64);
- const __m256i cospi_m12_p20 = pair256_set_epi16(-cospi_12_64, cospi_20_64);
- const __m256i cospi_p08_p24 = pair256_set_epi16(cospi_8_64, cospi_24_64);
- const __m256i cospi_p24_m08 = pair256_set_epi16(cospi_24_64, -cospi_8_64);
- const __m256i cospi_m24_p08 = pair256_set_epi16(-cospi_24_64, cospi_8_64);
- const __m256i cospi_m16_m16 = _mm256_set1_epi16((int16_t)-cospi_16_64);
- const __m256i cospi_p16_p16 = _mm256_set1_epi16((int16_t)cospi_16_64);
- const __m256i cospi_p16_m16 = pair256_set_epi16(cospi_16_64, -cospi_16_64);
- const __m256i cospi_m16_p16 = pair256_set_epi16(-cospi_16_64, cospi_16_64);
- const __m256i zero = _mm256_setzero_si256();
- __m256i x[16], s[16];
- __m256i u[4], v[4];
-
- // stage 1
- butterfly_32b(&in[15], &in[0], &cospi_p01_p31, &cospi_p31_m01, u);
- butterfly_32b(&in[7], &in[8], &cospi_p17_p15, &cospi_p15_m17, v);
- add_rnd(u, v, &x[0]);
- sub_rnd(u, v, &x[8]);
-
- butterfly_32b(&in[13], &in[2], &cospi_p05_p27, &cospi_p27_m05, u);
- butterfly_32b(&in[5], &in[10], &cospi_p21_p11, &cospi_p11_m21, v);
- add_rnd(u, v, &x[2]);
- sub_rnd(u, v, &x[10]);
-
- butterfly_32b(&in[11], &in[4], &cospi_p09_p23, &cospi_p23_m09, u);
- butterfly_32b(&in[3], &in[12], &cospi_p25_p07, &cospi_p07_m25, v);
- add_rnd(u, v, &x[4]);
- sub_rnd(u, v, &x[12]);
-
- butterfly_32b(&in[9], &in[6], &cospi_p13_p19, &cospi_p19_m13, u);
- butterfly_32b(&in[1], &in[14], &cospi_p29_p03, &cospi_p03_m29, v);
- add_rnd(u, v, &x[6]);
- sub_rnd(u, v, &x[14]);
-
- // stage 2
- s[0] = _mm256_add_epi16(x[0], x[4]);
- s[1] = _mm256_add_epi16(x[1], x[5]);
- s[2] = _mm256_add_epi16(x[2], x[6]);
- s[3] = _mm256_add_epi16(x[3], x[7]);
- s[4] = _mm256_sub_epi16(x[0], x[4]);
- s[5] = _mm256_sub_epi16(x[1], x[5]);
- s[6] = _mm256_sub_epi16(x[2], x[6]);
- s[7] = _mm256_sub_epi16(x[3], x[7]);
- butterfly_32b(&x[8], &x[9], &cospi_p04_p28, &cospi_p28_m04, u);
- butterfly_32b(&x[12], &x[13], &cospi_m28_p04, &cospi_p04_p28, v);
- add_rnd(u, v, &s[8]);
- sub_rnd(u, v, &s[12]);
-
- butterfly_32b(&x[10], &x[11], &cospi_p20_p12, &cospi_p12_m20, u);
- butterfly_32b(&x[14], &x[15], &cospi_m12_p20, &cospi_p20_p12, v);
- add_rnd(u, v, &s[10]);
- sub_rnd(u, v, &s[14]);
-
- // stage 3
- x[0] = _mm256_add_epi16(s[0], s[2]);
- x[1] = _mm256_add_epi16(s[1], s[3]);
- x[2] = _mm256_sub_epi16(s[0], s[2]);
- x[3] = _mm256_sub_epi16(s[1], s[3]);
-
- x[8] = _mm256_add_epi16(s[8], s[10]);
- x[9] = _mm256_add_epi16(s[9], s[11]);
- x[10] = _mm256_sub_epi16(s[8], s[10]);
- x[11] = _mm256_sub_epi16(s[9], s[11]);
-
- butterfly_32b(&s[4], &s[5], &cospi_p08_p24, &cospi_p24_m08, u);
- butterfly_32b(&s[6], &s[7], &cospi_m24_p08, &cospi_p08_p24, v);
- add_rnd(u, v, &x[4]);
- sub_rnd(u, v, &x[6]);
-
- butterfly_32b(&s[12], &s[13], &cospi_p08_p24, &cospi_p24_m08, u);
- butterfly_32b(&s[14], &s[15], &cospi_m24_p08, &cospi_p08_p24, v);
- add_rnd(u, v, &x[12]);
- sub_rnd(u, v, &x[14]);
-
- // stage 4
- butterfly_32b(&x[2], &x[3], &cospi_m16_m16, &cospi_p16_m16, u);
- butterfly_32b(&x[6], &x[7], &cospi_p16_p16, &cospi_m16_p16, v);
- butterfly_rnd(u, &x[2]);
- butterfly_rnd(v, &x[6]);
-
- butterfly_32b(&x[10], &x[11], &cospi_p16_p16, &cospi_m16_p16, u);
- butterfly_32b(&x[14], &x[15], &cospi_m16_m16, &cospi_p16_m16, v);
- butterfly_rnd(u, &x[10]);
- butterfly_rnd(v, &x[14]);
-
- in[0] = x[0];
- in[1] = _mm256_sub_epi16(zero, x[8]);
- in[2] = x[12];
- in[3] = _mm256_sub_epi16(zero, x[4]);
- in[4] = x[6];
- in[5] = x[14];
- in[6] = x[10];
- in[7] = x[2];
- in[8] = x[3];
- in[9] = x[11];
- in[10] = x[15];
- in[11] = x[7];
- in[12] = x[5];
- in[13] = _mm256_sub_epi16(zero, x[13]);
- in[14] = x[9];
- in[15] = _mm256_sub_epi16(zero, x[1]);
-}
-
-static void iadst16(__m256i *in) {
- mm256_transpose_16x16(in, in);
- iadst16_avx2(in);
-}
-
-#if CONFIG_EXT_TX
-static void flip_row(__m256i *in, int rows) {
- int i;
- for (i = 0; i < rows; ++i) {
- mm256_reverse_epi16(&in[i]);
- }
-}
-
-static void flip_col(uint8_t **dest, int *stride, int rows) {
- *dest = *dest + (rows - 1) * (*stride);
- *stride = -*stride;
-}
-
-static void iidtx16(__m256i *in) {
- mm256_transpose_16x16(in, in);
- txfm_scaling16_avx2((int16_t)Sqrt2, in);
-}
-#endif
-
-void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m256i in[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- load_buffer_16x16(input, in);
- switch (tx_type) {
- case DCT_DCT:
- idct16(in);
- idct16(in);
- break;
- case ADST_DCT:
- idct16(in);
- iadst16(in);
- break;
- case DCT_ADST:
- iadst16(in);
- idct16(in);
- break;
- case ADST_ADST:
- iadst16(in);
- iadst16(in);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- idct16(in);
- iadst16(in);
- flip_col(&dest, &stride, 16);
- break;
- case DCT_FLIPADST:
- iadst16(in);
- idct16(in);
- flip_row(in, 16);
- break;
- case FLIPADST_FLIPADST:
- iadst16(in);
- iadst16(in);
- flip_row(in, 16);
- flip_col(&dest, &stride, 16);
- break;
- case ADST_FLIPADST:
- iadst16(in);
- iadst16(in);
- flip_row(in, 16);
- break;
- case FLIPADST_ADST:
- iadst16(in);
- iadst16(in);
- flip_col(&dest, &stride, 16);
- break;
- case IDTX:
- iidtx16(in);
- iidtx16(in);
- break;
- case V_DCT:
- iidtx16(in);
- idct16(in);
- break;
- case H_DCT:
- idct16(in);
- iidtx16(in);
- break;
- case V_ADST:
- iidtx16(in);
- iadst16(in);
- break;
- case H_ADST:
- iadst16(in);
- iidtx16(in);
- break;
- case V_FLIPADST:
- iidtx16(in);
- iadst16(in);
- flip_col(&dest, &stride, 16);
- break;
- case H_FLIPADST:
- iadst16(in);
- iidtx16(in);
- flip_row(in, 16);
- break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
- store_buffer_16xN(in, stride, dest, 16);
-}
diff --git a/third_party/aom/av1/common/x86/idct_intrin_sse2.c b/third_party/aom/av1/common/x86/idct_intrin_sse2.c
deleted file mode 100644
index 541165c8d..000000000
--- a/third_party/aom/av1/common/x86/idct_intrin_sse2.c
+++ /dev/null
@@ -1,1411 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "aom_dsp/x86/inv_txfm_sse2.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "aom_ports/mem.h"
-#include "av1/common/enums.h"
-
-#if CONFIG_EXT_TX
-static INLINE void fliplr_4x4(__m128i *in /*in[2]*/) {
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
-}
-
-static INLINE void fliplr_8x8(__m128i *in /*in[8]*/) {
- in[0] = mm_reverse_epi16(in[0]);
- in[1] = mm_reverse_epi16(in[1]);
- in[2] = mm_reverse_epi16(in[2]);
- in[3] = mm_reverse_epi16(in[3]);
-
- in[4] = mm_reverse_epi16(in[4]);
- in[5] = mm_reverse_epi16(in[5]);
- in[6] = mm_reverse_epi16(in[6]);
- in[7] = mm_reverse_epi16(in[7]);
-}
-
-static INLINE void fliplr_16x8(__m128i *in /*in[16]*/) {
- fliplr_8x8(&in[0]);
- fliplr_8x8(&in[8]);
-}
-
-#define FLIPLR_16x16(in0, in1) \
- do { \
- __m128i *tmp; \
- fliplr_16x8(in0); \
- fliplr_16x8(in1); \
- tmp = (in0); \
- (in0) = (in1); \
- (in1) = tmp; \
- } while (0)
-
-#define FLIPUD_PTR(dest, stride, size) \
- do { \
- (dest) = (dest) + ((size)-1) * (stride); \
- (stride) = -(stride); \
- } while (0)
-#endif
-
-void av1_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- __m128i in[2];
- const __m128i zero = _mm_setzero_si128();
- const __m128i eight = _mm_set1_epi16(8);
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- in[0] = load_input_data(input);
- in[1] = load_input_data(input + 8);
-
- switch (tx_type) {
- case DCT_DCT:
- aom_idct4_sse2(in);
- aom_idct4_sse2(in);
- break;
- case ADST_DCT:
- aom_idct4_sse2(in);
- aom_iadst4_sse2(in);
- break;
- case DCT_ADST:
- aom_iadst4_sse2(in);
- aom_idct4_sse2(in);
- break;
- case ADST_ADST:
- aom_iadst4_sse2(in);
- aom_iadst4_sse2(in);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- aom_idct4_sse2(in);
- aom_iadst4_sse2(in);
- FLIPUD_PTR(dest, stride, 4);
- break;
- case DCT_FLIPADST:
- aom_iadst4_sse2(in);
- aom_idct4_sse2(in);
- fliplr_4x4(in);
- break;
- case FLIPADST_FLIPADST:
- aom_iadst4_sse2(in);
- aom_iadst4_sse2(in);
- FLIPUD_PTR(dest, stride, 4);
- fliplr_4x4(in);
- break;
- case ADST_FLIPADST:
- aom_iadst4_sse2(in);
- aom_iadst4_sse2(in);
- fliplr_4x4(in);
- break;
- case FLIPADST_ADST:
- aom_iadst4_sse2(in);
- aom_iadst4_sse2(in);
- FLIPUD_PTR(dest, stride, 4);
- break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-
- // Final round and shift
- in[0] = _mm_add_epi16(in[0], eight);
- in[1] = _mm_add_epi16(in[1], eight);
-
- in[0] = _mm_srai_epi16(in[0], 4);
- in[1] = _mm_srai_epi16(in[1], 4);
-
- // Reconstruction and Store
- {
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
- __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
- __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
- d0 = _mm_unpacklo_epi32(d0, d1);
- d2 = _mm_unpacklo_epi32(d2, d3);
- d0 = _mm_unpacklo_epi8(d0, zero);
- d2 = _mm_unpacklo_epi8(d2, zero);
- d0 = _mm_add_epi16(d0, in[0]);
- d2 = _mm_add_epi16(d2, in[1]);
- d0 = _mm_packus_epi16(d0, d2);
- // store result[0]
- *(int *)dest = _mm_cvtsi128_si32(d0);
- // store result[1]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
- // store result[2]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
- // store result[3]
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
- }
-}
-
-void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- __m128i in[8];
- const __m128i zero = _mm_setzero_si128();
- const __m128i final_rounding = _mm_set1_epi16(1 << 4);
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- // load input data
- in[0] = load_input_data(input);
- in[1] = load_input_data(input + 8 * 1);
- in[2] = load_input_data(input + 8 * 2);
- in[3] = load_input_data(input + 8 * 3);
- in[4] = load_input_data(input + 8 * 4);
- in[5] = load_input_data(input + 8 * 5);
- in[6] = load_input_data(input + 8 * 6);
- in[7] = load_input_data(input + 8 * 7);
-
- switch (tx_type) {
- case DCT_DCT:
- aom_idct8_sse2(in);
- aom_idct8_sse2(in);
- break;
- case ADST_DCT:
- aom_idct8_sse2(in);
- aom_iadst8_sse2(in);
- break;
- case DCT_ADST:
- aom_iadst8_sse2(in);
- aom_idct8_sse2(in);
- break;
- case ADST_ADST:
- aom_iadst8_sse2(in);
- aom_iadst8_sse2(in);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- aom_idct8_sse2(in);
- aom_iadst8_sse2(in);
- FLIPUD_PTR(dest, stride, 8);
- break;
- case DCT_FLIPADST:
- aom_iadst8_sse2(in);
- aom_idct8_sse2(in);
- fliplr_8x8(in);
- break;
- case FLIPADST_FLIPADST:
- aom_iadst8_sse2(in);
- aom_iadst8_sse2(in);
- FLIPUD_PTR(dest, stride, 8);
- fliplr_8x8(in);
- break;
- case ADST_FLIPADST:
- aom_iadst8_sse2(in);
- aom_iadst8_sse2(in);
- fliplr_8x8(in);
- break;
- case FLIPADST_ADST:
- aom_iadst8_sse2(in);
- aom_iadst8_sse2(in);
- FLIPUD_PTR(dest, stride, 8);
- break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
- in[4] = _mm_adds_epi16(in[4], final_rounding);
- in[5] = _mm_adds_epi16(in[5], final_rounding);
- in[6] = _mm_adds_epi16(in[6], final_rounding);
- in[7] = _mm_adds_epi16(in[7], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 5);
- in[1] = _mm_srai_epi16(in[1], 5);
- in[2] = _mm_srai_epi16(in[2], 5);
- in[3] = _mm_srai_epi16(in[3], 5);
- in[4] = _mm_srai_epi16(in[4], 5);
- in[5] = _mm_srai_epi16(in[5], 5);
- in[6] = _mm_srai_epi16(in[6], 5);
- in[7] = _mm_srai_epi16(in[7], 5);
-
- RECON_AND_STORE(dest + 0 * stride, in[0]);
- RECON_AND_STORE(dest + 1 * stride, in[1]);
- RECON_AND_STORE(dest + 2 * stride, in[2]);
- RECON_AND_STORE(dest + 3 * stride, in[3]);
- RECON_AND_STORE(dest + 4 * stride, in[4]);
- RECON_AND_STORE(dest + 5 * stride, in[5]);
- RECON_AND_STORE(dest + 6 * stride, in[6]);
- RECON_AND_STORE(dest + 7 * stride, in[7]);
-}
-
-#if CONFIG_EXT_TX
-static void iidtx16_sse2(__m128i *in0, __m128i *in1) {
- array_transpose_16x16(in0, in1);
- idtx16_8col(in0);
- idtx16_8col(in1);
-}
-#endif // CONFIG_EXT_TX
-
-void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m128i in[32];
- __m128i *in0 = &in[0];
- __m128i *in1 = &in[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- load_buffer_8x16(input, in0);
- input += 8;
- load_buffer_8x16(input, in1);
-
- switch (tx_type) {
- case DCT_DCT:
- aom_idct16_sse2(in0, in1);
- aom_idct16_sse2(in0, in1);
- break;
- case ADST_DCT:
- aom_idct16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- break;
- case DCT_ADST:
- aom_iadst16_sse2(in0, in1);
- aom_idct16_sse2(in0, in1);
- break;
- case ADST_ADST:
- aom_iadst16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- aom_idct16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- FLIPUD_PTR(dest, stride, 16);
- break;
- case DCT_FLIPADST:
- aom_iadst16_sse2(in0, in1);
- aom_idct16_sse2(in0, in1);
- FLIPLR_16x16(in0, in1);
- break;
- case FLIPADST_FLIPADST:
- aom_iadst16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- FLIPUD_PTR(dest, stride, 16);
- FLIPLR_16x16(in0, in1);
- break;
- case ADST_FLIPADST:
- aom_iadst16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- FLIPLR_16x16(in0, in1);
- break;
- case FLIPADST_ADST:
- aom_iadst16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- FLIPUD_PTR(dest, stride, 16);
- break;
- case IDTX:
- iidtx16_sse2(in0, in1);
- iidtx16_sse2(in0, in1);
- break;
- case V_DCT:
- iidtx16_sse2(in0, in1);
- aom_idct16_sse2(in0, in1);
- break;
- case H_DCT:
- aom_idct16_sse2(in0, in1);
- iidtx16_sse2(in0, in1);
- break;
- case V_ADST:
- iidtx16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- break;
- case H_ADST:
- aom_iadst16_sse2(in0, in1);
- iidtx16_sse2(in0, in1);
- break;
- case V_FLIPADST:
- iidtx16_sse2(in0, in1);
- aom_iadst16_sse2(in0, in1);
- FLIPUD_PTR(dest, stride, 16);
- break;
- case H_FLIPADST:
- aom_iadst16_sse2(in0, in1);
- iidtx16_sse2(in0, in1);
- FLIPLR_16x16(in0, in1);
- break;
-#endif // CONFIG_EXT_TX
- default: assert(0); break;
- }
-
- write_buffer_8x16(dest, in0, stride);
- dest += 8;
- write_buffer_8x16(dest, in1, stride);
-}
-
-#if CONFIG_EXT_TX
-static void iidtx8_sse2(__m128i *in) {
- in[0] = _mm_slli_epi16(in[0], 1);
- in[1] = _mm_slli_epi16(in[1], 1);
- in[2] = _mm_slli_epi16(in[2], 1);
- in[3] = _mm_slli_epi16(in[3], 1);
- in[4] = _mm_slli_epi16(in[4], 1);
- in[5] = _mm_slli_epi16(in[5], 1);
- in[6] = _mm_slli_epi16(in[6], 1);
- in[7] = _mm_slli_epi16(in[7], 1);
-}
-
-static INLINE void iidtx4_sse2(__m128i *in) {
- const __m128i v_scale_w = _mm_set1_epi16((int16_t)Sqrt2);
-
- const __m128i v_p0l_w = _mm_mullo_epi16(in[0], v_scale_w);
- const __m128i v_p0h_w = _mm_mulhi_epi16(in[0], v_scale_w);
- const __m128i v_p1l_w = _mm_mullo_epi16(in[1], v_scale_w);
- const __m128i v_p1h_w = _mm_mulhi_epi16(in[1], v_scale_w);
-
- const __m128i v_p0a_d = _mm_unpacklo_epi16(v_p0l_w, v_p0h_w);
- const __m128i v_p0b_d = _mm_unpackhi_epi16(v_p0l_w, v_p0h_w);
- const __m128i v_p1a_d = _mm_unpacklo_epi16(v_p1l_w, v_p1h_w);
- const __m128i v_p1b_d = _mm_unpackhi_epi16(v_p1l_w, v_p1h_w);
-
- in[0] = _mm_packs_epi32(xx_roundn_epi32_unsigned(v_p0a_d, DCT_CONST_BITS),
- xx_roundn_epi32_unsigned(v_p0b_d, DCT_CONST_BITS));
- in[1] = _mm_packs_epi32(xx_roundn_epi32_unsigned(v_p1a_d, DCT_CONST_BITS),
- xx_roundn_epi32_unsigned(v_p1b_d, DCT_CONST_BITS));
-}
-
-// load 8x8 array
-static INLINE void flip_buffer_lr_8x8(__m128i *in) {
- in[0] = mm_reverse_epi16(in[0]);
- in[1] = mm_reverse_epi16(in[1]);
- in[2] = mm_reverse_epi16(in[2]);
- in[3] = mm_reverse_epi16(in[3]);
- in[4] = mm_reverse_epi16(in[4]);
- in[5] = mm_reverse_epi16(in[5]);
- in[6] = mm_reverse_epi16(in[6]);
- in[7] = mm_reverse_epi16(in[7]);
-}
-#endif // CONFIG_EXT_TX
-
-void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m128i in[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- in[0] = load_input_data(input + 0 * 8);
- in[1] = load_input_data(input + 1 * 8);
- in[2] = load_input_data(input + 2 * 8);
- in[3] = load_input_data(input + 3 * 8);
- in[4] = load_input_data(input + 4 * 8);
- in[5] = load_input_data(input + 5 * 8);
- in[6] = load_input_data(input + 6 * 8);
- in[7] = load_input_data(input + 7 * 8);
-
- in[8] = load_input_data(input + 8 * 8);
- in[9] = load_input_data(input + 9 * 8);
- in[10] = load_input_data(input + 10 * 8);
- in[11] = load_input_data(input + 11 * 8);
- in[12] = load_input_data(input + 12 * 8);
- in[13] = load_input_data(input + 13 * 8);
- in[14] = load_input_data(input + 14 * 8);
- in[15] = load_input_data(input + 15 * 8);
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- aom_idct8_sse2(in);
- array_transpose_8x8(in, in);
- aom_idct8_sse2(in + 8);
- array_transpose_8x8(in + 8, in + 8);
- break;
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
-#endif
- aom_iadst8_sse2(in);
- array_transpose_8x8(in, in);
- aom_iadst8_sse2(in + 8);
- array_transpose_8x8(in + 8, in + 8);
- break;
-#if CONFIG_EXT_TX
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
- iidtx8_sse2(in);
- iidtx8_sse2(in + 8);
- break;
-#endif
- default: assert(0); break;
- }
- scale_sqrt2_8x8(in);
- scale_sqrt2_8x8(in + 8);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- idct16_8col(in);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- iadst16_8col(in);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX: idtx16_8col(in); break;
-#endif
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case H_DCT:
-#endif
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
-#endif
- write_buffer_8x16(dest, in, stride);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: write_buffer_8x16(dest + stride * 15, in, -stride); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- flip_buffer_lr_8x8(in);
- flip_buffer_lr_8x8(in + 8);
- write_buffer_8x16(dest, in, stride);
- break;
- case FLIPADST_FLIPADST:
- flip_buffer_lr_8x8(in);
- flip_buffer_lr_8x8(in + 8);
- write_buffer_8x16(dest + stride * 15, in, -stride);
- break;
-#endif
- default: assert(0); break;
- }
-}
-
-static INLINE void write_buffer_8x8_round6(uint8_t *dest, __m128i *in,
- int stride) {
- const __m128i final_rounding = _mm_set1_epi16(1 << 5);
- const __m128i zero = _mm_setzero_si128();
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
- in[4] = _mm_adds_epi16(in[4], final_rounding);
- in[5] = _mm_adds_epi16(in[5], final_rounding);
- in[6] = _mm_adds_epi16(in[6], final_rounding);
- in[7] = _mm_adds_epi16(in[7], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 6);
- in[1] = _mm_srai_epi16(in[1], 6);
- in[2] = _mm_srai_epi16(in[2], 6);
- in[3] = _mm_srai_epi16(in[3], 6);
- in[4] = _mm_srai_epi16(in[4], 6);
- in[5] = _mm_srai_epi16(in[5], 6);
- in[6] = _mm_srai_epi16(in[6], 6);
- in[7] = _mm_srai_epi16(in[7], 6);
-
- RECON_AND_STORE(dest + 0 * stride, in[0]);
- RECON_AND_STORE(dest + 1 * stride, in[1]);
- RECON_AND_STORE(dest + 2 * stride, in[2]);
- RECON_AND_STORE(dest + 3 * stride, in[3]);
- RECON_AND_STORE(dest + 4 * stride, in[4]);
- RECON_AND_STORE(dest + 5 * stride, in[5]);
- RECON_AND_STORE(dest + 6 * stride, in[6]);
- RECON_AND_STORE(dest + 7 * stride, in[7]);
-}
-
-void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m128i in[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- // Transpose 16x8 input into in[]
- in[0] = load_input_data(input + 0 * 16);
- in[1] = load_input_data(input + 1 * 16);
- in[2] = load_input_data(input + 2 * 16);
- in[3] = load_input_data(input + 3 * 16);
- in[4] = load_input_data(input + 4 * 16);
- in[5] = load_input_data(input + 5 * 16);
- in[6] = load_input_data(input + 6 * 16);
- in[7] = load_input_data(input + 7 * 16);
- array_transpose_8x8(in, in);
-
- in[8] = load_input_data(input + 8 + 0 * 16);
- in[9] = load_input_data(input + 8 + 1 * 16);
- in[10] = load_input_data(input + 8 + 2 * 16);
- in[11] = load_input_data(input + 8 + 3 * 16);
- in[12] = load_input_data(input + 8 + 4 * 16);
- in[13] = load_input_data(input + 8 + 5 * 16);
- in[14] = load_input_data(input + 8 + 6 * 16);
- in[15] = load_input_data(input + 8 + 7 * 16);
- array_transpose_8x8(in + 8, in + 8);
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- idct16_8col(in);
- break;
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
-#endif
- iadst16_8col(in);
- break;
-#if CONFIG_EXT_TX
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX: idtx16_8col(in); break;
-#endif
- default: assert(0); break;
- }
-
- // Scale
- scale_sqrt2_8x8(in);
- scale_sqrt2_8x8(in + 8);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- aom_idct8_sse2(in);
- aom_idct8_sse2(in + 8);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- aom_iadst8_sse2(in);
- aom_iadst8_sse2(in + 8);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX:
- array_transpose_8x8(in, in);
- array_transpose_8x8(in + 8, in + 8);
- iidtx8_sse2(in);
- iidtx8_sse2(in + 8);
- break;
-#endif
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
-#endif
- write_buffer_8x8_round6(dest, in, stride);
- write_buffer_8x8_round6(dest + 8, in + 8, stride);
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST:
- write_buffer_8x8_round6(dest + stride * 7, in, -stride);
- write_buffer_8x8_round6(dest + stride * 7 + 8, in + 8, -stride);
- break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- flip_buffer_lr_8x8(in);
- flip_buffer_lr_8x8(in + 8);
- write_buffer_8x8_round6(dest, in + 8, stride);
- write_buffer_8x8_round6(dest + 8, in, stride);
- break;
- case FLIPADST_FLIPADST:
- flip_buffer_lr_8x8(in);
- flip_buffer_lr_8x8(in + 8);
- write_buffer_8x8_round6(dest + stride * 7, in + 8, -stride);
- write_buffer_8x8_round6(dest + stride * 7 + 8, in, -stride);
- break;
-#endif
- default: assert(0); break;
- }
-}
-
-static INLINE void write_buffer_8x4_round5(uint8_t *dest, __m128i *in,
- int stride) {
- const __m128i final_rounding = _mm_set1_epi16(1 << 4);
- const __m128i zero = _mm_setzero_si128();
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 5);
- in[1] = _mm_srai_epi16(in[1], 5);
- in[2] = _mm_srai_epi16(in[2], 5);
- in[3] = _mm_srai_epi16(in[3], 5);
-
- RECON_AND_STORE(dest + 0 * stride, in[0]);
- RECON_AND_STORE(dest + 1 * stride, in[1]);
- RECON_AND_STORE(dest + 2 * stride, in[2]);
- RECON_AND_STORE(dest + 3 * stride, in[3]);
-}
-
-void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- __m128i in[8];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- in[0] = load_input_data(input + 0 * 8);
- in[1] = load_input_data(input + 1 * 8);
- in[2] = load_input_data(input + 2 * 8);
- in[3] = load_input_data(input + 3 * 8);
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- aom_idct8_sse2(in);
- break;
- case DCT_ADST:
- case ADST_ADST: aom_iadst8_sse2(in); break;
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST: aom_iadst8_sse2(in); break;
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX: iidtx8_sse2(in); array_transpose_8x8(in, in);
-#endif
- break;
- default: assert(0); break;
- }
-
- scale_sqrt2_8x8(in);
-
- // Repack data. We pack into the bottom half of 'in'
- // so that the next repacking stage can pack into the
- // top half without overwriting anything
- in[7] = _mm_unpacklo_epi64(in[6], in[7]);
- in[6] = _mm_unpacklo_epi64(in[4], in[5]);
- in[5] = _mm_unpacklo_epi64(in[2], in[3]);
- in[4] = _mm_unpacklo_epi64(in[0], in[1]);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- aom_idct4_sse2(in + 4);
- aom_idct4_sse2(in + 6);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- aom_iadst4_sse2(in + 4);
- aom_iadst4_sse2(in + 6);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX:
- iidtx4_sse2(in + 4);
- array_transpose_4x4(in + 4);
- iidtx4_sse2(in + 6);
- array_transpose_4x4(in + 6);
- break;
-#endif
- default: assert(0); break;
- }
-
- // Repack data
- in[0] = _mm_unpacklo_epi64(in[4], in[6]);
- in[1] = _mm_unpackhi_epi64(in[4], in[6]);
- in[2] = _mm_unpacklo_epi64(in[5], in[7]);
- in[3] = _mm_unpackhi_epi64(in[5], in[7]);
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX: break;
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: FLIPUD_PTR(dest, stride, 4); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- in[0] = mm_reverse_epi16(in[0]);
- in[1] = mm_reverse_epi16(in[1]);
- in[2] = mm_reverse_epi16(in[2]);
- in[3] = mm_reverse_epi16(in[3]);
- break;
- case FLIPADST_FLIPADST:
- in[0] = mm_reverse_epi16(in[0]);
- in[1] = mm_reverse_epi16(in[1]);
- in[2] = mm_reverse_epi16(in[2]);
- in[3] = mm_reverse_epi16(in[3]);
- FLIPUD_PTR(dest, stride, 4);
-#endif
- break;
- default: assert(0); break;
- }
- write_buffer_8x4_round5(dest, in, stride);
-}
-
-static INLINE void write_buffer_4x8_round5(uint8_t *dest, __m128i *in,
- int stride) {
- const __m128i final_rounding = _mm_set1_epi16(1 << 4);
- const __m128i zero = _mm_setzero_si128();
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 5);
- in[1] = _mm_srai_epi16(in[1], 5);
- in[2] = _mm_srai_epi16(in[2], 5);
- in[3] = _mm_srai_epi16(in[3], 5);
-
- // Reconstruction and Store
- {
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
- __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
- __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
- __m128i d4 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 4));
- __m128i d5 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 5));
- __m128i d6 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 6));
- __m128i d7 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 7));
-
- d0 = _mm_unpacklo_epi32(d0, d1);
- d2 = _mm_unpacklo_epi32(d2, d3);
- d4 = _mm_unpacklo_epi32(d4, d5);
- d6 = _mm_unpacklo_epi32(d6, d7);
- d0 = _mm_unpacklo_epi8(d0, zero);
- d2 = _mm_unpacklo_epi8(d2, zero);
- d4 = _mm_unpacklo_epi8(d4, zero);
- d6 = _mm_unpacklo_epi8(d6, zero);
- d0 = _mm_add_epi16(d0, in[0]);
- d2 = _mm_add_epi16(d2, in[1]);
- d4 = _mm_add_epi16(d4, in[2]);
- d6 = _mm_add_epi16(d6, in[3]);
-
- d0 = _mm_packus_epi16(d0, d2);
- *(int *)dest = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
- d0 = _mm_packus_epi16(d4, d6);
- *(int *)(dest + stride * 4) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 5) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 6) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 7) = _mm_cvtsi128_si32(d0);
- }
-}
-
-void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- __m128i in[8];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- // Load rows, packed two per element of 'in'.
- // We pack into the bottom half of 'in' so that the
- // later repacking stage can pack into the
- // top half without overwriting anything
- in[4] = load_input_data(input + 0 * 8);
- in[5] = load_input_data(input + 1 * 8);
- in[6] = load_input_data(input + 2 * 8);
- in[7] = load_input_data(input + 3 * 8);
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- aom_idct4_sse2(in + 4);
- aom_idct4_sse2(in + 6);
- break;
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
-#endif
- aom_iadst4_sse2(in + 4);
- aom_iadst4_sse2(in + 6);
- break;
-#if CONFIG_EXT_TX
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
- iidtx4_sse2(in + 4);
- array_transpose_4x4(in + 4);
- iidtx4_sse2(in + 6);
- array_transpose_4x4(in + 6);
- break;
-#endif
- default: assert(0); break;
- }
-
- scale_sqrt2_8x4(in + 4);
-
- // Repack data
- in[0] = _mm_unpacklo_epi64(in[4], in[6]);
- in[1] = _mm_unpackhi_epi64(in[4], in[6]);
- in[2] = _mm_unpacklo_epi64(in[5], in[7]);
- in[3] = _mm_unpackhi_epi64(in[5], in[7]);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- aom_idct8_sse2(in);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- aom_iadst8_sse2(in);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX:
- iidtx8_sse2(in);
- array_transpose_8x8(in, in);
- break;
-#endif
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
-#endif
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: FLIPUD_PTR(dest, stride, 8); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
- in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
- in[4] = _mm_shufflelo_epi16(in[4], 0x1b);
- in[5] = _mm_shufflelo_epi16(in[5], 0x1b);
- in[6] = _mm_shufflelo_epi16(in[6], 0x1b);
- in[7] = _mm_shufflelo_epi16(in[7], 0x1b);
- break;
- case FLIPADST_FLIPADST:
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
- in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
- in[4] = _mm_shufflelo_epi16(in[4], 0x1b);
- in[5] = _mm_shufflelo_epi16(in[5], 0x1b);
- in[6] = _mm_shufflelo_epi16(in[6], 0x1b);
- in[7] = _mm_shufflelo_epi16(in[7], 0x1b);
- FLIPUD_PTR(dest, stride, 8);
- break;
-#endif
- default: assert(0); break;
- }
- in[0] = _mm_unpacklo_epi64(in[0], in[1]);
- in[1] = _mm_unpacklo_epi64(in[2], in[3]);
- in[2] = _mm_unpacklo_epi64(in[4], in[5]);
- in[3] = _mm_unpacklo_epi64(in[6], in[7]);
- write_buffer_4x8_round5(dest, in, stride);
-}
-
-// Note: The 16-column 32-element transforms take input in the form of four
-// 8x16 blocks (each stored as a __m128i[16]), which are the four quadrants
-// of the overall 16x32 input buffer.
-static INLINE void idct32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
- __m128i *br) {
- array_transpose_16x16(tl, tr);
- array_transpose_16x16(bl, br);
- idct32_8col(tl, bl);
- idct32_8col(tr, br);
-}
-
-static INLINE void ihalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
- __m128i *br) {
- __m128i tmpl[16], tmpr[16];
- int i;
-
- // Copy the top half of the input to temporary storage
- for (i = 0; i < 16; ++i) {
- tmpl[i] = tl[i];
- tmpr[i] = tr[i];
- }
-
- // Generate the top half of the output
- for (i = 0; i < 16; ++i) {
- tl[i] = _mm_slli_epi16(bl[i], 2);
- tr[i] = _mm_slli_epi16(br[i], 2);
- }
- array_transpose_16x16(tl, tr);
-
- // Copy the temporary storage back to the bottom half of the input
- for (i = 0; i < 16; ++i) {
- bl[i] = tmpl[i];
- br[i] = tmpr[i];
- }
-
- // Generate the bottom half of the output
- scale_sqrt2_8x16(bl);
- scale_sqrt2_8x16(br);
- aom_idct16_sse2(bl, br); // Includes a transposition
-}
-
-#if CONFIG_EXT_TX
-static INLINE void iidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
- __m128i *br) {
- int i;
- array_transpose_16x16(tl, tr);
- array_transpose_16x16(bl, br);
- for (i = 0; i < 16; ++i) {
- tl[i] = _mm_slli_epi16(tl[i], 2);
- tr[i] = _mm_slli_epi16(tr[i], 2);
- bl[i] = _mm_slli_epi16(bl[i], 2);
- br[i] = _mm_slli_epi16(br[i], 2);
- }
-}
-#endif // CONFIG_EXT_TX
-
-static INLINE void write_buffer_16x32_round6(uint8_t *dest, __m128i *intl,
- __m128i *intr, __m128i *inbl,
- __m128i *inbr, int stride) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i final_rounding = _mm_set1_epi16(1 << 5);
- int i;
-
- for (i = 0; i < 16; ++i) {
- intl[i] = _mm_adds_epi16(intl[i], final_rounding);
- intr[i] = _mm_adds_epi16(intr[i], final_rounding);
- inbl[i] = _mm_adds_epi16(inbl[i], final_rounding);
- inbr[i] = _mm_adds_epi16(inbr[i], final_rounding);
- intl[i] = _mm_srai_epi16(intl[i], 6);
- intr[i] = _mm_srai_epi16(intr[i], 6);
- inbl[i] = _mm_srai_epi16(inbl[i], 6);
- inbr[i] = _mm_srai_epi16(inbr[i], 6);
- RECON_AND_STORE(dest + i * stride + 0, intl[i]);
- RECON_AND_STORE(dest + i * stride + 8, intr[i]);
- RECON_AND_STORE(dest + (i + 16) * stride + 0, inbl[i]);
- RECON_AND_STORE(dest + (i + 16) * stride + 8, inbr[i]);
- }
-}
-
-void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m128i intl[16], intr[16], inbl[16], inbr[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- int i;
- for (i = 0; i < 16; ++i) {
- intl[i] = load_input_data(input + i * 16 + 0);
- intr[i] = load_input_data(input + i * 16 + 8);
- inbl[i] = load_input_data(input + (i + 16) * 16 + 0);
- inbr[i] = load_input_data(input + (i + 16) * 16 + 8);
- }
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- aom_idct16_sse2(intl, intr);
- aom_idct16_sse2(inbl, inbr);
- break;
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
-#endif
- aom_iadst16_sse2(intl, intr);
- aom_iadst16_sse2(inbl, inbr);
- break;
-#if CONFIG_EXT_TX
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
- iidtx16_sse2(intl, intr);
- iidtx16_sse2(inbl, inbr);
- break;
-#endif
- default: assert(0); break;
- }
-
- scale_sqrt2_8x16(intl);
- scale_sqrt2_8x16(intr);
- scale_sqrt2_8x16(inbl);
- scale_sqrt2_8x16(inbr);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- idct32_16col(intl, intr, inbl, inbr);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- ihalfright32_16col(intl, intr, inbl, inbr);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX: iidtx32_16col(intl, intr, inbl, inbr); break;
-#endif
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
-#endif
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: FLIPUD_PTR(dest, stride, 32); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- for (i = 0; i < 16; ++i) {
- __m128i tmp = intl[i];
- intl[i] = mm_reverse_epi16(intr[i]);
- intr[i] = mm_reverse_epi16(tmp);
- tmp = inbl[i];
- inbl[i] = mm_reverse_epi16(inbr[i]);
- inbr[i] = mm_reverse_epi16(tmp);
- }
- break;
- case FLIPADST_FLIPADST:
- for (i = 0; i < 16; ++i) {
- __m128i tmp = intl[i];
- intl[i] = mm_reverse_epi16(intr[i]);
- intr[i] = mm_reverse_epi16(tmp);
- tmp = inbl[i];
- inbl[i] = mm_reverse_epi16(inbr[i]);
- inbr[i] = mm_reverse_epi16(tmp);
- }
- FLIPUD_PTR(dest, stride, 32);
- break;
-#endif
- default: assert(0); break;
- }
- write_buffer_16x32_round6(dest, intl, intr, inbl, inbr, stride);
-}
-
-static INLINE void write_buffer_32x16_round6(uint8_t *dest, __m128i *in0,
- __m128i *in1, __m128i *in2,
- __m128i *in3, int stride) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i final_rounding = _mm_set1_epi16(1 << 5);
- int i;
-
- for (i = 0; i < 16; ++i) {
- in0[i] = _mm_adds_epi16(in0[i], final_rounding);
- in1[i] = _mm_adds_epi16(in1[i], final_rounding);
- in2[i] = _mm_adds_epi16(in2[i], final_rounding);
- in3[i] = _mm_adds_epi16(in3[i], final_rounding);
- in0[i] = _mm_srai_epi16(in0[i], 6);
- in1[i] = _mm_srai_epi16(in1[i], 6);
- in2[i] = _mm_srai_epi16(in2[i], 6);
- in3[i] = _mm_srai_epi16(in3[i], 6);
- RECON_AND_STORE(dest + i * stride + 0, in0[i]);
- RECON_AND_STORE(dest + i * stride + 8, in1[i]);
- RECON_AND_STORE(dest + i * stride + 16, in2[i]);
- RECON_AND_STORE(dest + i * stride + 24, in3[i]);
- }
-}
-
-void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- __m128i in0[16], in1[16], in2[16], in3[16];
- const TX_TYPE tx_type = txfm_param->tx_type;
- int i;
-
- for (i = 0; i < 16; ++i) {
- in0[i] = load_input_data(input + i * 32 + 0);
- in1[i] = load_input_data(input + i * 32 + 8);
- in2[i] = load_input_data(input + i * 32 + 16);
- in3[i] = load_input_data(input + i * 32 + 24);
- }
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case H_DCT:
-#endif
- idct32_16col(in0, in1, in2, in3);
- break;
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
-#endif
- ihalfright32_16col(in0, in1, in2, in3);
- break;
-#if CONFIG_EXT_TX
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX: iidtx32_16col(in0, in1, in2, in3); break;
-#endif
- default: assert(0); break;
- }
-
- scale_sqrt2_8x16(in0);
- scale_sqrt2_8x16(in1);
- scale_sqrt2_8x16(in2);
- scale_sqrt2_8x16(in3);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
-#if CONFIG_EXT_TX
- case DCT_FLIPADST:
- case V_DCT:
-#endif
- aom_idct16_sse2(in0, in1);
- aom_idct16_sse2(in2, in3);
- break;
- case ADST_DCT:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST:
-#endif
- aom_iadst16_sse2(in0, in1);
- aom_iadst16_sse2(in2, in3);
- break;
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX:
- iidtx16_sse2(in0, in1);
- iidtx16_sse2(in2, in3);
- break;
-#endif
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
-#if CONFIG_EXT_TX
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
-#endif
- break;
-#if CONFIG_EXT_TX
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: FLIPUD_PTR(dest, stride, 16); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- for (i = 0; i < 16; ++i) {
- __m128i tmp1 = in0[i];
- __m128i tmp2 = in1[i];
- in0[i] = mm_reverse_epi16(in3[i]);
- in1[i] = mm_reverse_epi16(in2[i]);
- in2[i] = mm_reverse_epi16(tmp2);
- in3[i] = mm_reverse_epi16(tmp1);
- }
- break;
- case FLIPADST_FLIPADST:
- for (i = 0; i < 16; ++i) {
- __m128i tmp1 = in0[i];
- __m128i tmp2 = in1[i];
- in0[i] = mm_reverse_epi16(in3[i]);
- in1[i] = mm_reverse_epi16(in2[i]);
- in2[i] = mm_reverse_epi16(tmp2);
- in3[i] = mm_reverse_epi16(tmp1);
- }
- FLIPUD_PTR(dest, stride, 16);
- break;
-#endif
- default: assert(0); break;
- }
- write_buffer_32x16_round6(dest, in0, in1, in2, in3, stride);
-}
diff --git a/third_party/aom/av1/common/x86/intra_edge_sse4.c b/third_party/aom/av1/common/x86/intra_edge_sse4.c
index ea4acff33..0c857b583 100644
--- a/third_party/aom/av1/common/x86/intra_edge_sse4.c
+++ b/third_party/aom/av1/common/x86/intra_edge_sse4.c
@@ -12,8 +12,8 @@
#include <assert.h>
#include <smmintrin.h>
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) {
if (!strength) return;
@@ -39,9 +39,9 @@ void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) {
// Adjust input pointer for filter support area
uint8_t *in = (strength == 3) ? p - 1 : p;
- // Avoid modifying first/last samples
+ // Avoid modifying first sample
uint8_t *out = p + 1;
- int len = sz - 2;
+ int len = sz - 1;
const int use_3tap_filter = (strength < 3);
@@ -133,9 +133,9 @@ void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength) {
// Adjust input pointer for filter support area
uint16_t *in = (strength == 3) ? p - 1 : p;
- // Avoid modifying first/last samples
+ // Avoid modifying first sample
uint16_t *out = p + 1;
- int len = sz - 2;
+ int len = sz - 1;
const int use_3tap_filter = (strength < 3);
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/jnt_convolve_avx2.c
new file mode 100644
index 000000000..ac1d2c9ca
--- /dev/null
+++ b/third_party/aom/av1/common/x86/jnt_convolve_avx2.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/convolve_common_intrin.h"
+#include "aom_dsp/x86/convolve_sse4_1.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "av1/common/convolve.h"
+
+void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
+ int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bd = 8;
+ int i, j;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi16(w0);
+ const __m256i wt1 = _mm256_set1_epi16(w1);
+ const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
+ __m256i filt[4], coeffs[4];
+
+ assert(bits >= 0);
+ assert(conv_params->round_0 > 0);
+
+ filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+ filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+ filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+ filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+ prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
+
+ const __m256i round_const =
+ _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
+
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ for (i = 0; i < h; i += 2) {
+ for (j = 0; j < w; j += 8) {
+ const __m256i data = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&src_ptr[i * src_stride + j + src_stride]))),
+ 0x20);
+
+ __m256i res = convolve_lowbd_x(data, coeffs, filt);
+
+ res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift);
+
+ res = _mm256_slli_epi16(res, bits);
+
+ const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m256i data_ref_0 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ if (w > 4) {
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
+ } else {
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ _mm_cvtsi128_si32(res_1);
+ }
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ }
+}
+
+void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
+ int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bd = 8;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_vert * src_stride;
+ // +1 to compensate for dividing the filter coeffs by 2
+ const int left_shift = FILTER_BITS - conv_params->round_0 + 1;
+ const __m256i round_const =
+ _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi16(w0);
+ const __m256i wt1 = _mm256_set1_epi16(w1);
+ const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi16(offset);
+ const int offset_1 = (1 << (bd + FILTER_BITS - 2));
+ const __m256i offset_const_1 = _mm256_set1_epi16(offset_1);
+ const __m256i offset_const_2 = _mm256_set1_epi16((1 << offset_0));
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
+ const __m256i zero = _mm256_setzero_si256();
+ __m256i coeffs[4], s[8];
+
+ assert((FILTER_BITS - conv_params->round_0) >= 0);
+
+ prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
+
+ (void)conv_params;
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+
+ for (j = 0; j < w; j += 16) {
+ const uint8_t *data = &src_ptr[j];
+ __m256i src6;
+
+ // Load lines a and b. Line a to lower 128, line b to upper 128
+ const __m256i src_01a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ 0x20);
+
+ const __m256i src_12a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ 0x20);
+
+ const __m256i src_23a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ 0x20);
+
+ const __m256i src_34a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ 0x20);
+
+ const __m256i src_45a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ 0x20);
+
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
+ const __m256i src_56a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
+ src6, 0x20);
+
+ s[0] = _mm256_unpacklo_epi8(src_01a, src_12a);
+ s[1] = _mm256_unpacklo_epi8(src_23a, src_34a);
+ s[2] = _mm256_unpacklo_epi8(src_45a, src_56a);
+
+ s[4] = _mm256_unpackhi_epi8(src_01a, src_12a);
+ s[5] = _mm256_unpackhi_epi8(src_23a, src_34a);
+ s[6] = _mm256_unpackhi_epi8(src_45a, src_56a);
+
+ for (i = 0; i < h; i += 2) {
+ data = &src_ptr[i * src_stride + j];
+ const __m256i src_67a = _mm256_permute2x128_si256(
+ src6,
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ 0x20);
+
+ src6 = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
+ const __m256i src_78a = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
+ src6, 0x20);
+
+ s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
+ s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
+
+ __m256i res_lo = convolve_lowbd(s, coeffs);
+
+ res_lo = _mm256_add_epi16(res_lo, offset_const_1);
+
+ const __m256i res_lo_0_32b = _mm256_unpacklo_epi16(res_lo, zero);
+ const __m256i res_lo_0_shift =
+ _mm256_slli_epi32(res_lo_0_32b, left_shift);
+ const __m256i res_lo_0_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_lo_0_shift, round_const), round_shift);
+
+ const __m256i res_lo_1_32b = _mm256_unpackhi_epi16(res_lo, zero);
+ const __m256i res_lo_1_shift =
+ _mm256_slli_epi32(res_lo_1_32b, left_shift);
+ const __m256i res_lo_1_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_lo_1_shift, round_const), round_shift);
+
+ const __m256i res_lo_round =
+ _mm256_packs_epi32(res_lo_0_round, res_lo_1_round);
+
+ const __m256i res_lo_unsigned =
+ _mm256_add_epi16(res_lo_round, offset_const_2);
+
+ if (w - j < 16) {
+ if (do_average) {
+ const __m256i data_ref_0 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_lo_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ if (w - j > 4) {
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
+ } else {
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ _mm_cvtsi128_si32(res_1);
+ }
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_lo_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+
+ const __m128i res_1 = _mm256_extracti128_si256(res_lo_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ __m256i res_hi = convolve_lowbd(s + 4, coeffs);
+
+ res_hi = _mm256_add_epi16(res_hi, offset_const_1);
+
+ const __m256i res_hi_0_32b = _mm256_unpacklo_epi16(res_hi, zero);
+ const __m256i res_hi_0_shift =
+ _mm256_slli_epi32(res_hi_0_32b, left_shift);
+ const __m256i res_hi_0_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_hi_0_shift, round_const), round_shift);
+
+ const __m256i res_hi_1_32b = _mm256_unpackhi_epi16(res_hi, zero);
+ const __m256i res_hi_1_shift =
+ _mm256_slli_epi32(res_hi_1_32b, left_shift);
+ const __m256i res_hi_1_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_hi_1_shift, round_const), round_shift);
+
+ const __m256i res_hi_round =
+ _mm256_packs_epi32(res_hi_0_round, res_hi_1_round);
+
+ const __m256i res_hi_unsigned =
+ _mm256_add_epi16(res_hi_round, offset_const_2);
+
+ if (do_average) {
+ const __m256i data_ref_0_lo = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i data_ref_0_hi = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j + 8]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + 8 + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res_lo =
+ comp_avg(&data_ref_0_lo, &res_lo_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i comp_avg_res_hi =
+ comp_avg(&data_ref_0_hi, &res_hi_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result_lo = convolve_rounding(
+ &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i round_result_hi = convolve_rounding(
+ &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 =
+ _mm256_packus_epi16(round_result_lo, round_result_hi);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_store_si128(
+ (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
+
+ } else {
+ const __m128i res_lo_0 = _mm256_castsi256_si128(res_lo_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_lo_0);
+
+ const __m128i res_lo_1 = _mm256_extracti128_si256(res_lo_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_lo_1);
+
+ const __m128i res_hi_0 = _mm256_castsi256_si128(res_hi_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + 8]), res_hi_0);
+
+ const __m128i res_hi_1 = _mm256_extracti128_si256(res_hi_unsigned, 1);
+ _mm_store_si128(
+ (__m128i *)(&dst[i * dst_stride + j + 8 + dst_stride]), res_hi_1);
+ }
+ }
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+}
+
+void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
+ int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bd = 8;
+
+ DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = 8;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi16(w0);
+ const __m256i wt1 = _mm256_set1_epi16(w1);
+ const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
+ __m256i filt[4], s[8], coeffs_x[4], coeffs_y[4];
+
+ assert(conv_params->round_0 > 0);
+
+ filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+ filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+ filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+ filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
+
+ prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_x);
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
+
+ const __m256i round_const_h = _mm256_set1_epi16(
+ ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2)));
+ const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1);
+
+ const __m256i round_const_v = _mm256_set1_epi32(
+ ((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift_v = _mm_cvtsi32_si128(conv_params->round_1);
+
+ for (j = 0; j < w; j += 8) {
+ /* Horizontal filter */
+ {
+ for (i = 0; i < im_h; i += 2) {
+ __m256i data = _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j]));
+ if (i + 1 < im_h)
+ data = _mm256_inserti128_si256(
+ data,
+ _mm_loadu_si128(
+ (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]),
+ 1);
+ __m256i res = convolve_lowbd_x(data, coeffs_x, filt);
+
+ res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h),
+ round_shift_h);
+
+ _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
+ }
+ }
+
+ /* Vertical filter */
+ {
+ __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
+ __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
+ __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
+ __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
+ __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
+ __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
+
+ s[0] = _mm256_unpacklo_epi16(s0, s1);
+ s[1] = _mm256_unpacklo_epi16(s2, s3);
+ s[2] = _mm256_unpacklo_epi16(s4, s5);
+
+ s[4] = _mm256_unpackhi_epi16(s0, s1);
+ s[5] = _mm256_unpackhi_epi16(s2, s3);
+ s[6] = _mm256_unpackhi_epi16(s4, s5);
+
+ for (i = 0; i < h; i += 2) {
+ const int16_t *data = &im_block[i * im_stride];
+
+ const __m256i s6 =
+ _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
+ const __m256i s7 =
+ _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
+
+ s[3] = _mm256_unpacklo_epi16(s6, s7);
+ s[7] = _mm256_unpackhi_epi16(s6, s7);
+
+ const __m256i res_a = convolve(s, coeffs_y);
+ const __m256i res_a_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_a, round_const_v), round_shift_v);
+
+ if (w - j > 4) {
+ const __m256i res_b = convolve(s + 4, coeffs_y);
+ const __m256i res_b_round = _mm256_sra_epi32(
+ _mm256_add_epi32(res_b, round_const_v), round_shift_v);
+ const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_b_round);
+ const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const);
+
+ if (do_average) {
+ const __m256i data_ref_0 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 =
+ _mm256_packus_epi16(round_result, round_result);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ } else {
+ const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_a_round);
+ const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const);
+
+ if (do_average) {
+ const __m256i data_ref_0 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 =
+ _mm256_packus_epi16(round_result, round_result);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ _mm_cvtsi128_si32(res_1);
+
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+
+ s[0] = s[1];
+ s[1] = s[2];
+ s[2] = s[3];
+
+ s[4] = s[5];
+ s[5] = s[6];
+ s[6] = s[7];
+ }
+ }
+ }
+}
+
+void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride,
+ uint8_t *dst0, int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int bd = 8;
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ (void)filter_params_x;
+ (void)filter_params_y;
+ (void)subpel_x_q4;
+ (void)subpel_y_q4;
+
+ const int bits =
+ FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m256i wt0 = _mm256_set1_epi16(w0);
+ const __m256i wt1 = _mm256_set1_epi16(w1);
+ const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
+ const __m256i zero = _mm256_setzero_si256();
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m256i offset_const = _mm256_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
+ int i, j;
+
+ if (!(w % 16)) {
+ for (i = 0; i < h; i += 1) {
+ for (j = 0; j < w; j += 16) {
+ const __m256i src_16bit = _mm256_cvtepu8_epi16(
+ _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])));
+
+ const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
+ const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
+
+ if (do_average) {
+ const __m256i data_ref_0 =
+ _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j]));
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
+ const __m256i res_0 = _mm256_permute4x64_epi64(res_8, 0xD8);
+
+ _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]),
+ _mm256_castsi256_si128(res_0));
+ } else {
+ _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]),
+ res_unsigned);
+ }
+ }
+ }
+ } else if (!(w % 4)) {
+ for (i = 0; i < h; i += 2) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i src_row_0 =
+ _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
+ const __m128i src_row_1 =
+ _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
+ // since not all compilers yet support _mm256_set_m128i()
+ const __m256i src_10 = _mm256_insertf128_si256(
+ _mm256_castsi128_si256(src_row_0), src_row_1, 1);
+
+ const __m256i src_16bit = _mm256_unpacklo_epi8(src_10, zero);
+
+ const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
+
+ const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m256i data_ref_0 = _mm256_permute2x128_si256(
+ _mm256_castsi128_si256(
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]))),
+ _mm256_castsi128_si256(_mm_loadu_si128(
+ (__m128i *)(&dst[i * dst_stride + j + dst_stride]))),
+ 0x20);
+
+ const __m256i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m256i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
+ const __m128i res_0 = _mm256_castsi256_si128(res_8);
+ const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
+
+ if (w > 4) {
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
+ _mm_storel_epi64(
+ (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
+ } else {
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ _mm_cvtsi128_si32(res_1);
+ }
+ } else {
+ const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
+
+ const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
+ res_1);
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_sse2.c b/third_party/aom/av1/common/x86/jnt_convolve_sse2.c
new file mode 100644
index 000000000..4df7bd42e
--- /dev/null
+++ b/third_party/aom/av1/common/x86/jnt_convolve_sse2.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_sse2.h"
+
+void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
+ int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int bd = 8;
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ const int dst_stride = conv_params->dst_stride;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint8_t *src_ptr = src - fo_horiz;
+ const int bits = FILTER_BITS - conv_params->round_1;
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
+ __m128i coeffs[4];
+
+ (void)filter_params_y;
+ (void)subpel_y_q4;
+
+ prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
+
+ if (w == 4) {
+ do {
+ const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
+ __m128i s[4];
+
+ s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
+ s[1] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
+ s[2] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
+ s[3] =
+ _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
+ const __m128i res_lo = convolve_lo_x(s, coeffs);
+ const __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+ const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
+
+ const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_lo_shift);
+ const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[0]), res_unsigned);
+ }
+ src_ptr += src_stride;
+ dst += dst_stride;
+ dst0 += dst_stride0;
+ } while (--h);
+ } else {
+ assert(!(w % 8));
+ int i = 0;
+ do {
+ int j = 0;
+ do {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+ __m128i s[4];
+
+ // Filter even-index pixels
+ s[0] = data;
+ s[1] = _mm_srli_si128(data, 2);
+ s[2] = _mm_srli_si128(data, 4);
+ s[3] = _mm_srli_si128(data, 6);
+ const __m128i res_even = convolve_lo_x(s, coeffs);
+
+ // Filter odd-index pixels
+ s[0] = _mm_srli_si128(data, 1);
+ s[1] = _mm_srli_si128(data, 3);
+ s[2] = _mm_srli_si128(data, 5);
+ s[3] = _mm_srli_si128(data, 7);
+ const __m128i res_odd = convolve_lo_x(s, coeffs);
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+ const __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+ const __m128i res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+ const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
+ const __m128i res_hi_shift = _mm_sll_epi32(res_hi_round, left_shift);
+
+ const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
+ const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
+ }
+ j += 8;
+ } while (j < w);
+ } while (++i < h);
+ }
+}
+
+void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
+ int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ const int bd = 8;
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ const int dst_stride = conv_params->dst_stride;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const uint8_t *src_ptr = src - fo_vert * src_stride;
+ const int bits = FILTER_BITS - conv_params->round_0;
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const __m128i wt0 = _mm_set1_epi16(conv_params->fwd_offset);
+ const __m128i wt1 = _mm_set1_epi16(conv_params->bck_offset);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
+ const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_1) >> 1);
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
+ __m128i coeffs[4];
+
+ (void)filter_params_x;
+ (void)subpel_x_q4;
+
+ prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
+
+ if (w == 4) {
+ __m128i s[8], src6, res, res_shift;
+ src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
+ s[0] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
+ s[1] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
+ s[2] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
+ s[3] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
+ s[4] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
+ s[5] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
+
+ do {
+ s[6] = _mm_unpacklo_epi8(
+ src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
+ src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
+ s[7] = _mm_unpacklo_epi8(
+ _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
+
+ res = convolve_lo_y(s + 0, coeffs);
+ res_shift = _mm_sll_epi32(res, left_shift);
+ res_shift =
+ _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
+
+ __m128i res_16b = _mm_packs_epi32(res_shift, res_shift);
+ __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+
+ } else {
+ _mm_store_si128((__m128i *)dst, res_unsigned);
+ }
+
+ src_ptr += src_stride;
+ dst += dst_stride;
+ dst0 += dst_stride0;
+
+ res = convolve_lo_y(s + 1, coeffs);
+ res_shift = _mm_sll_epi32(res, left_shift);
+ res_shift =
+ _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
+
+ res_16b = _mm_packs_epi32(res_shift, res_shift);
+ res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+
+ } else {
+ _mm_store_si128((__m128i *)dst, res_unsigned);
+ }
+
+ src_ptr += src_stride;
+ dst += dst_stride;
+ dst0 += dst_stride0;
+
+ s[0] = s[2];
+ s[1] = s[3];
+ s[2] = s[4];
+ s[3] = s[5];
+ s[4] = s[6];
+ s[5] = s[7];
+ h -= 2;
+ } while (h);
+ } else {
+ assert(!(w % 8));
+ int j = 0;
+ do {
+ __m128i s[8], src6, res_lo, res_hi, res_lo_shift, res_hi_shift;
+ const uint8_t *data = &src_ptr[j];
+
+ src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
+ s[0] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
+ s[1] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
+ s[2] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
+ s[3] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
+ s[4] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
+ _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
+ s[5] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
+
+ int i = 0;
+ do {
+ data = &src_ptr[i * src_stride + j];
+ s[6] = _mm_unpacklo_epi8(
+ src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
+ src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
+ s[7] = _mm_unpacklo_epi8(
+ _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
+
+ res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels
+ res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels
+ res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
+ res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
+ res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
+ round_shift);
+ res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
+ round_shift);
+
+ __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
+ __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
+ }
+ i++;
+
+ res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels
+ res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels
+ res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
+ res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
+ res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
+ round_shift);
+ res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
+ round_shift);
+ res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
+ res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ __m128i data_ref_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
+ }
+ i++;
+
+ s[0] = s[2];
+ s[1] = s[3];
+ s[2] = s[4];
+ s[3] = s[5];
+ s[4] = s[6];
+ s[5] = s[7];
+ } while (i < h);
+ j += 8;
+ } while (j < w);
+ }
+}
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c b/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c
new file mode 100644
index 000000000..e4d51ac8d
--- /dev/null
+++ b/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_sse2.h"
+
+void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride,
+ uint8_t *dst0, int dst_stride0, int w, int h,
+ InterpFilterParams *filter_params_x,
+ InterpFilterParams *filter_params_y,
+ const int subpel_x_q4, const int subpel_y_q4,
+ ConvolveParams *conv_params) {
+ CONV_BUF_TYPE *dst = conv_params->dst;
+ int dst_stride = conv_params->dst_stride;
+ const int bd = 8;
+
+ DECLARE_ALIGNED(16, int16_t,
+ im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
+ int im_h = h + filter_params_y->taps - 1;
+ int im_stride = MAX_SB_SIZE;
+ int i, j;
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const int do_average = conv_params->do_average;
+ const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
+ const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ const __m128i zero = _mm_setzero_si128();
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+
+ const int offset_0 =
+ bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
+ const __m128i offset_const = _mm_set1_epi16(offset);
+ const int rounding_shift =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
+
+ /* Horizontal filter */
+ {
+ const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
+ const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
+
+ for (i = 0; i < im_h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+
+ const __m128i src_lo = _mm_unpacklo_epi8(data, zero);
+ const __m128i src_hi = _mm_unpackhi_epi8(data, zero);
+
+ // Filter even-index pixels
+ const __m128i res_0 = _mm_madd_epi16(src_lo, coeff_01);
+ const __m128i src_2 = _mm_alignr_epi8(src_hi, src_lo, 4);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i src_6 = _mm_alignr_epi8(src_hi, src_lo, 12);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
+ _mm_add_epi32(res_2, res_6));
+ res_even =
+ _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
+
+ // Filter odd-index pixels
+ const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2);
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i src_3 = _mm_alignr_epi8(src_hi, src_lo, 6);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i src_5 = _mm_alignr_epi8(src_hi, src_lo, 10);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i src_7 = _mm_alignr_epi8(src_hi, src_lo, 14);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
+ _mm_add_epi32(res_3, res_7));
+ res_odd =
+ _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
+
+ // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
+ __m128i res = _mm_packs_epi32(res_even, res_odd);
+ _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+ *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
+ const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ ((1 << conv_params->round_1) >> 1) -
+ (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
+ const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ // Filter even-index pixels
+ const int16_t *data = &im_block[i * im_stride + j];
+ const __m128i src_0 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
+ *(__m128i *)(data + 1 * im_stride));
+ const __m128i src_2 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
+ *(__m128i *)(data + 3 * im_stride));
+ const __m128i src_4 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
+ *(__m128i *)(data + 5 * im_stride));
+ const __m128i src_6 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
+ *(__m128i *)(data + 7 * im_stride));
+
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
+ _mm_add_epi32(res_4, res_6));
+
+ // Filter odd-index pixels
+ const __m128i src_1 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
+ *(__m128i *)(data + 1 * im_stride));
+ const __m128i src_3 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
+ *(__m128i *)(data + 3 * im_stride));
+ const __m128i src_5 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
+ *(__m128i *)(data + 5 * im_stride));
+ const __m128i src_7 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
+ *(__m128i *)(data + 7 * im_stride));
+
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
+ _mm_add_epi32(res_5, res_7));
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+ const __m128i res_lo_round =
+ _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
+ const __m128i res_hi_round =
+ _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
+
+ const __m128i res_16b = _mm_packs_epi32(res_lo_round, res_hi_round);
+ const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
+
+ // Accumulate values into the destination buffer
+ if (do_average) {
+ const __m128i data_ref_0 =
+ _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
+
+ const __m128i comp_avg_res =
+ comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
+
+ const __m128i round_result = convolve_rounding(
+ &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
+
+ const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
+
+ if (w > 4)
+ _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
+ else
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_8);
+ } else {
+ _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/pvq_sse4.c b/third_party/aom/av1/common/x86/pvq_sse4.c
deleted file mode 100644
index b3ed9efdf..000000000
--- a/third_party/aom/av1/common/x86/pvq_sse4.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-#include <emmintrin.h>
-#include <tmmintrin.h>
-#include <float.h>
-
-#include "./av1_rtcd.h"
-#include "av1/common/x86/pvq_sse4.h"
-#include "../odintrin.h"
-#include "av1/common/pvq.h"
-
-#define EPSILON 1e-15f
-
-static __m128 horizontal_sum_ps(__m128 x) {
- x = _mm_add_ps(x, _mm_shuffle_ps(x, x, _MM_SHUFFLE(1, 0, 3, 2)));
- x = _mm_add_ps(x, _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1)));
- return x;
-}
-
-static __m128i horizontal_sum_epi32(__m128i x) {
- x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
- x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)));
- return x;
-}
-
-static INLINE float rsqrtf(float x) {
- float y;
- _mm_store_ss(&y, _mm_rsqrt_ss(_mm_load_ss(&x)));
- return y;
-}
-
-/** Find the codepoint on the given PSphere closest to the desired
- * vector. This is a float-precision PVQ search just to make sure
- * our tests aren't limited by numerical accuracy. It's close to the
- * pvq_search_rdo_double_c implementation, but is not bit accurate and
- * it performs slightly worse on PSNR. One reason is that this code runs
- * more RDO iterations than the C code. It also uses single precision
- * floating point math, whereas the C version uses double precision.
- *
- * @param [in] xcoeff input vector to quantize (x in the math doc)
- * @param [in] n number of dimensions
- * @param [in] k number of pulses
- * @param [out] ypulse optimal codevector found (y in the math doc)
- * @param [in] g2 multiplier for the distortion (typically squared
- * gain units)
- * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
- * @param [in] prev_k number of pulses already in ypulse that we should
- * reuse for the search (or 0 for a new search)
- * @return cosine distance between x and y (between 0 and 1)
- */
-double pvq_search_rdo_double_sse4_1(const od_val16 *xcoeff, int n, int k,
- int *ypulse, double g2,
- double pvq_norm_lambda, int prev_k) {
- int i, j;
- int reuse_pulses = prev_k > 0 && prev_k <= k;
- /* TODO - This blows our 8kB stack space budget and should be fixed when
- converting PVQ to fixed point. */
- float xx = 0, xy = 0, yy = 0;
- float x[MAXN + 3];
- float y[MAXN + 3];
- float sign_y[MAXN + 3];
- for (i = 0; i < n; i++) {
- float tmp = (float)xcoeff[i];
- xx += tmp * tmp;
- x[i] = xcoeff[i];
- }
-
- x[n] = x[n + 1] = x[n + 2] = 0;
- ypulse[n] = ypulse[n + 1] = ypulse[n + 2] = 0;
-
- __m128 sums = _mm_setzero_ps();
- for (i = 0; i < n; i += 4) {
- __m128 x4 = _mm_loadu_ps(&x[i]);
- __m128 s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
- /* Save the sign, we'll put it back later. */
- _mm_storeu_ps(&sign_y[i], s4);
- /* Get rid of the sign. */
- x4 = _mm_andnot_ps(_mm_set_ps1(-0.f), x4);
- sums = _mm_add_ps(sums, x4);
- if (!reuse_pulses) {
- /* Clear y and ypulse in case we don't do the projection. */
- _mm_storeu_ps(&y[i], _mm_setzero_ps());
- _mm_storeu_si128((__m128i *)&ypulse[i], _mm_setzero_si128());
- }
- _mm_storeu_ps(&x[i], x4);
- }
- sums = horizontal_sum_ps(sums);
- int pulses_left = k;
- {
- __m128i pulses_sum;
- __m128 yy4, xy4;
- xy4 = yy4 = _mm_setzero_ps();
- pulses_sum = _mm_setzero_si128();
- if (reuse_pulses) {
- /* We reuse pulses from a previous search so we don't have to search them
- again. */
- for (j = 0; j < n; j += 4) {
- __m128 x4, y4;
- __m128i iy4;
- iy4 = _mm_abs_epi32(_mm_loadu_si128((__m128i *)&ypulse[j]));
- pulses_sum = _mm_add_epi32(pulses_sum, iy4);
- _mm_storeu_si128((__m128i *)&ypulse[j], iy4);
- y4 = _mm_cvtepi32_ps(iy4);
- x4 = _mm_loadu_ps(&x[j]);
- xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
- yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
- /* Double the y[] vector so we don't have to do it in the search loop.
- */
- _mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
- }
- pulses_left -= _mm_cvtsi128_si32(horizontal_sum_epi32(pulses_sum));
- xy4 = horizontal_sum_ps(xy4);
- xy = _mm_cvtss_f32(xy4);
- yy4 = horizontal_sum_ps(yy4);
- yy = _mm_cvtss_f32(yy4);
- } else if (k > (n >> 1)) {
- /* Do a pre-search by projecting on the pyramid. */
- __m128 rcp4;
- float sum = _mm_cvtss_f32(sums);
- /* If x is too small, just replace it with a pulse at 0. This prevents
- infinities and NaNs from causing too many pulses to be allocated. Here,
- 64 is an
- approximation of infinity. */
- if (sum <= EPSILON) {
- x[0] = 1.f;
- for (i = 1; i < n; i++) {
- x[i] = 0;
- }
- sums = _mm_set_ps1(1.f);
- }
- /* Using k + e with e < 1 guarantees we cannot get more than k pulses. */
- rcp4 = _mm_mul_ps(_mm_set_ps1((float)k + .8f), _mm_rcp_ps(sums));
- xy4 = yy4 = _mm_setzero_ps();
- pulses_sum = _mm_setzero_si128();
- for (j = 0; j < n; j += 4) {
- __m128 rx4, x4, y4;
- __m128i iy4;
- x4 = _mm_loadu_ps(&x[j]);
- rx4 = _mm_mul_ps(x4, rcp4);
- iy4 = _mm_cvttps_epi32(rx4);
- pulses_sum = _mm_add_epi32(pulses_sum, iy4);
- _mm_storeu_si128((__m128i *)&ypulse[j], iy4);
- y4 = _mm_cvtepi32_ps(iy4);
- xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
- yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
- /* Double the y[] vector so we don't have to do it in the search loop.
- */
- _mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
- }
- pulses_left -= _mm_cvtsi128_si32(horizontal_sum_epi32(pulses_sum));
- xy = _mm_cvtss_f32(horizontal_sum_ps(xy4));
- yy = _mm_cvtss_f32(horizontal_sum_ps(yy4));
- }
- x[n] = x[n + 1] = x[n + 2] = -100;
- y[n] = y[n + 1] = y[n + 2] = 100;
- }
-
- /* This should never happen. */
- OD_ASSERT(pulses_left <= n + 3);
-
- float lambda_delta_rate[MAXN + 3];
- if (pulses_left) {
- /* Hoist lambda to avoid the multiply in the loop. */
- float lambda =
- 0.5f * sqrtf(xx) * (float)pvq_norm_lambda / (FLT_MIN + (float)g2);
- float delta_rate = 3.f / n;
- __m128 count = _mm_set_ps(3, 2, 1, 0);
- for (i = 0; i < n; i += 4) {
- _mm_storeu_ps(&lambda_delta_rate[i],
- _mm_mul_ps(count, _mm_set_ps1(lambda * delta_rate)));
- count = _mm_add_ps(count, _mm_set_ps(4, 4, 4, 4));
- }
- }
- lambda_delta_rate[n] = lambda_delta_rate[n + 1] = lambda_delta_rate[n + 2] =
- 1e30f;
-
- for (i = 0; i < pulses_left; i++) {
- int best_id = 0;
- __m128 xy4, yy4;
- __m128 max, max2;
- __m128i count;
- __m128i pos;
-
- /* The squared magnitude term gets added anyway, so we might as well
- add it outside the loop. */
- yy = yy + 1;
- xy4 = _mm_load1_ps(&xy);
- yy4 = _mm_load1_ps(&yy);
- max = _mm_setzero_ps();
- pos = _mm_setzero_si128();
- count = _mm_set_epi32(3, 2, 1, 0);
- for (j = 0; j < n; j += 4) {
- __m128 x4, y4, r4;
- x4 = _mm_loadu_ps(&x[j]);
- y4 = _mm_loadu_ps(&y[j]);
- x4 = _mm_add_ps(x4, xy4);
- y4 = _mm_add_ps(y4, yy4);
- y4 = _mm_rsqrt_ps(y4);
- r4 = _mm_mul_ps(x4, y4);
- /* Subtract lambda. */
- r4 = _mm_sub_ps(r4, _mm_loadu_ps(&lambda_delta_rate[j]));
- /* Update the index of the max. */
- pos = _mm_max_epi16(
- pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
- /* Update the max. */
- max = _mm_max_ps(max, r4);
- /* Update the indices (+4) */
- count = _mm_add_epi32(count, _mm_set_epi32(4, 4, 4, 4));
- }
- /* Horizontal max. */
- max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
- max2 =
- _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
- /* Now that max2 contains the max at all positions, look at which value(s)
- of the
- partial max is equal to the global max. */
- pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
- pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
- pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
- best_id = _mm_cvtsi128_si32(pos);
- OD_ASSERT(best_id < n);
- /* Updating the sums of the new pulse(s) */
- xy = xy + x[best_id];
- /* We're multiplying y[j] by two so we don't have to do it here. */
- yy = yy + y[best_id];
- /* Only now that we've made the final choice, update y/ypulse. */
- /* Multiplying y[j] by 2 so we don't have to do it everywhere else. */
- y[best_id] += 2;
- ypulse[best_id]++;
- }
-
- /* Put the original sign back. */
- for (i = 0; i < n; i += 4) {
- __m128i y4;
- __m128i s4;
- y4 = _mm_loadu_si128((__m128i *)&ypulse[i]);
- s4 = _mm_castps_si128(_mm_loadu_ps(&sign_y[i]));
- y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
- _mm_storeu_si128((__m128i *)&ypulse[i], y4);
- }
- return xy * rsqrtf(xx * yy + FLT_MIN);
-}
diff --git a/third_party/aom/av1/common/x86/pvq_sse4.h b/third_party/aom/av1/common/x86/pvq_sse4.h
deleted file mode 100644
index 3c4ce8543..000000000
--- a/third_party/aom/av1/common/x86/pvq_sse4.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_PVQ_X86_SSE4_H_
-#define AOM_COMMON_PVQ_X86_SSE4_H_
-#endif // AOM_COMMON_PVQ_X86_SSE4_H_
diff --git a/third_party/aom/av1/common/x86/reconinter_avx2.c b/third_party/aom/av1/common/x86/reconinter_avx2.c
new file mode 100644
index 000000000..ffbb31849
--- /dev/null
+++ b/third_party/aom/av1/common/x86/reconinter_avx2.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "av1/common/blockd.h"
+
+void av1_build_compound_diffwtd_mask_highbd_avx2(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
+ int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
+ int bd) {
+ if (w < 16) {
+ av1_build_compound_diffwtd_mask_highbd_ssse3(
+ mask, mask_type, src0, src0_stride, src1, src1_stride, h, w, bd);
+ } else {
+ assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV);
+ assert(bd >= 8);
+ assert((w % 16) == 0);
+ const __m256i y0 = _mm256_setzero_si256();
+ const __m256i yAOM_BLEND_A64_MAX_ALPHA =
+ _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
+ const int mask_base = 38;
+ const __m256i ymask_base = _mm256_set1_epi16(mask_base);
+ const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0);
+ const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1);
+ if (bd == 8) {
+ if (mask_type == DIFFWTD_38_INV) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
+ __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
+ __m256i diff = _mm256_srai_epi16(
+ _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2);
+ __m256i m = _mm256_min_epi16(
+ _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
+ yAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m);
+ m = _mm256_packus_epi16(m, m);
+ m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
+ __m128i m0 = _mm256_castsi256_si128(m);
+ _mm_storeu_si128((__m128i *)&mask[j], m0);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
+ __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
+ __m256i diff = _mm256_srai_epi16(
+ _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2);
+ __m256i m = _mm256_min_epi16(
+ _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
+ yAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm256_packus_epi16(m, m);
+ m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
+ __m128i m0 = _mm256_castsi256_si128(m);
+ _mm_storeu_si128((__m128i *)&mask[j], m0);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ }
+ } else {
+ const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
+ if (mask_type == DIFFWTD_38_INV) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
+ __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
+ __m256i diff = _mm256_sra_epi16(
+ _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift);
+ __m256i m = _mm256_min_epi16(
+ _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
+ yAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m);
+ m = _mm256_packus_epi16(m, m);
+ m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
+ __m128i m0 = _mm256_castsi256_si128(m);
+ _mm_storeu_si128((__m128i *)&mask[j], m0);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
+ __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
+ __m256i diff = _mm256_sra_epi16(
+ _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift);
+ __m256i m = _mm256_min_epi16(
+ _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
+ yAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm256_packus_epi16(m, m);
+ m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
+ __m128i m0 = _mm256_castsi256_si128(m);
+ _mm_storeu_si128((__m128i *)&mask[j], m0);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/reconinter_sse4.c b/third_party/aom/av1/common/x86/reconinter_sse4.c
new file mode 100644
index 000000000..5171ca493
--- /dev/null
+++ b/third_party/aom/av1/common/x86/reconinter_sse4.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h> // SSE2
+#include <smmintrin.h> /* SSE4.1 */
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "av1/common/blockd.h"
+
+static INLINE __m128i calc_mask(const __m128i mask_base, const __m128i s0,
+ const __m128i s1) {
+ const __m128i diff = _mm_abs_epi16(_mm_sub_epi16(s0, s1));
+ return _mm_abs_epi16(_mm_add_epi16(mask_base, _mm_srli_epi16(diff, 4)));
+ // clamp(diff, 0, 64) can be skiped for diff is always in the range ( 38, 54)
+}
+
+void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask,
+ DIFFWTD_MASK_TYPE mask_type,
+ const uint8_t *src0, int stride0,
+ const uint8_t *src1, int stride1,
+ int h, int w) {
+ const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0;
+ const __m128i mask_base = _mm_set1_epi16(38 - mb);
+ int i = 0;
+ if (4 == w) {
+ do {
+ const __m128i s0A = _mm_cvtsi32_si128(*(uint32_t *)src0);
+ const __m128i s0B = _mm_cvtsi32_si128(*(uint32_t *)(src0 + stride0));
+ const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
+ const __m128i s0 = _mm_cvtepu8_epi16(s0AB);
+
+ const __m128i s1A = _mm_cvtsi32_si128(*(uint32_t *)src1);
+ const __m128i s1B = _mm_cvtsi32_si128(*(uint32_t *)(src1 + stride1));
+ const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
+ const __m128i s1 = _mm_cvtepu8_epi16(s1AB);
+
+ const __m128i m16 = calc_mask(mask_base, s0, s1);
+ const __m128i m8 = _mm_packus_epi16(m16, m16);
+
+ *(uint32_t *)mask = _mm_cvtsi128_si32(m8);
+ *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1);
+ src0 += (stride0 << 1);
+ src1 += (stride1 << 1);
+ mask += 8;
+ i += 2;
+ } while (i < h);
+ } else if (8 == w) {
+ do {
+ __m128i s0 = _mm_loadl_epi64((__m128i const *)src0);
+ __m128i s1 = _mm_loadl_epi64((__m128i const *)src1);
+ s0 = _mm_cvtepu8_epi16(s0);
+ s1 = _mm_cvtepu8_epi16(s1);
+ const __m128i m16 = calc_mask(mask_base, s0, s1);
+ const __m128i m8 = _mm_packus_epi16(m16, m16);
+ _mm_storel_epi64((__m128i *)mask, m8);
+ src0 += stride0;
+ src1 += stride1;
+ mask += 8;
+ i += 1;
+ } while (i < h);
+ } else {
+ const __m128i zero = _mm_setzero_si128();
+ do {
+ int j = 0;
+ do {
+ const __m128i s0 = _mm_load_si128((__m128i const *)(src0 + j));
+ const __m128i s1 = _mm_load_si128((__m128i const *)(src1 + j));
+ const __m128i s0L = _mm_cvtepu8_epi16(s0);
+ const __m128i s1L = _mm_cvtepu8_epi16(s1);
+ const __m128i s0H = _mm_unpackhi_epi8(s0, zero);
+ const __m128i s1H = _mm_unpackhi_epi8(s1, zero);
+
+ const __m128i m16L = calc_mask(mask_base, s0L, s1L);
+ const __m128i m16H = calc_mask(mask_base, s0H, s1H);
+
+ const __m128i m8 = _mm_packus_epi16(m16L, m16H);
+ _mm_store_si128((__m128i *)(mask + j), m8);
+ j += 16;
+ } while (j < w);
+ src0 += stride0;
+ src1 += stride1;
+ mask += w;
+ i += 1;
+ } while (i < h);
+ }
+}
+
+void av1_build_compound_diffwtd_mask_d16_sse4_1(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
+ int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+ ConvolveParams *conv_params, int bd) {
+ const int which_inverse = (mask_type == DIFFWTD_38) ? 0 : 1;
+ const int mask_base = 38;
+ int round =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
+ const __m128i round_const = _mm_set1_epi16((1 << round) >> 1);
+ const __m128i mask_base_16 = _mm_set1_epi16(mask_base);
+ const __m128i clip_diff = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
+ const __m128i add_const =
+ _mm_set1_epi16((which_inverse ? AOM_BLEND_A64_MAX_ALPHA : 0));
+ const __m128i add_sign = _mm_set1_epi16((which_inverse ? -1 : 1));
+
+ int i, j;
+ // When rounding constant is added, there is a possibility of overflow.
+ // However that much precision is not required. Code should very well work for
+ // other values of DIFF_FACTOR_LOG2 and AOM_BLEND_A64_MAX_ALPHA as well. But
+ // there is a possibility of corner case bugs.
+ assert(DIFF_FACTOR_LOG2 == 4);
+ assert(AOM_BLEND_A64_MAX_ALPHA == 64);
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i data_src0 =
+ _mm_loadu_si128((__m128i *)&src0[(i * src0_stride) + j]);
+ const __m128i data_src1 =
+ _mm_loadu_si128((__m128i *)&src1[(i * src1_stride) + j]);
+
+ const __m128i diffa = _mm_subs_epu16(data_src0, data_src1);
+ const __m128i diffb = _mm_subs_epu16(data_src1, data_src0);
+ const __m128i diff = _mm_max_epu16(diffa, diffb);
+ const __m128i diff_round =
+ _mm_srli_epi16(_mm_adds_epu16(diff, round_const), round);
+ const __m128i diff_factor = _mm_srli_epi16(diff_round, DIFF_FACTOR_LOG2);
+ const __m128i diff_mask = _mm_adds_epi16(diff_factor, mask_base_16);
+ __m128i diff_clamp = _mm_min_epi16(diff_mask, clip_diff);
+ // clamp to 0 can be skipped since we are using add and saturate
+ // instruction
+
+ const __m128i diff_sign = _mm_sign_epi16(diff_clamp, add_sign);
+ const __m128i diff_const_16 = _mm_add_epi16(diff_sign, add_const);
+
+ // 8 bit conversion and saturation to uint8
+ const __m128i res_8 = _mm_packus_epi16(diff_const_16, diff_const_16);
+
+ // Store values into the destination buffer
+ __m128i *const dst = (__m128i *)&mask[i * w + j];
+
+ if ((w - j) > 4) {
+ _mm_storel_epi64(dst, res_8);
+ } else { // w==4
+ *(uint32_t *)dst = _mm_cvtsi128_si32(res_8);
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/reconinter_ssse3.c b/third_party/aom/av1/common/x86/reconinter_ssse3.c
new file mode 100644
index 000000000..cf684447c
--- /dev/null
+++ b/third_party/aom/av1/common/x86/reconinter_ssse3.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/blend.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "av1/common/blockd.h"
+
+void av1_build_compound_diffwtd_mask_highbd_ssse3(
+ uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
+ int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
+ int bd) {
+ if (w < 8) {
+ av1_build_compound_diffwtd_mask_highbd_c(mask, mask_type, src0, src0_stride,
+ src1, src1_stride, h, w, bd);
+ } else {
+ assert(bd >= 8);
+ assert((w % 8) == 0);
+ assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV);
+ const __m128i x0 = _mm_setzero_si128();
+ const __m128i xAOM_BLEND_A64_MAX_ALPHA =
+ _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
+ const int mask_base = 38;
+ const __m128i xmask_base = _mm_set1_epi16(mask_base);
+ const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0);
+ const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1);
+ if (bd == 8) {
+ if (mask_type == DIFFWTD_38_INV) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 8) {
+ __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
+ __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
+ __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
+ DIFF_FACTOR_LOG2);
+ __m128i m = _mm_min_epi16(
+ _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
+ xAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
+ m = _mm_packus_epi16(m, m);
+ _mm_storel_epi64((__m128i *)&mask[j], m);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 8) {
+ __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
+ __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
+ __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
+ DIFF_FACTOR_LOG2);
+ __m128i m = _mm_min_epi16(
+ _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
+ xAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm_packus_epi16(m, m);
+ _mm_storel_epi64((__m128i *)&mask[j], m);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ }
+ } else {
+ const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
+ if (mask_type == DIFFWTD_38_INV) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 8) {
+ __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
+ __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
+ __m128i diff =
+ _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
+ __m128i m = _mm_min_epi16(
+ _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
+ xAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
+ m = _mm_packus_epi16(m, m);
+ _mm_storel_epi64((__m128i *)&mask[j], m);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ } else {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 8) {
+ __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
+ __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
+ __m128i diff =
+ _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
+ __m128i m = _mm_min_epi16(
+ _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
+ xAOM_BLEND_A64_MAX_ALPHA);
+ m = _mm_packus_epi16(m, m);
+ _mm_storel_epi64((__m128i *)&mask[j], m);
+ }
+ ssrc0 += src0_stride;
+ ssrc1 += src1_stride;
+ mask += w;
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/selfguided_avx2.c b/third_party/aom/av1/common/x86/selfguided_avx2.c
new file mode 100644
index 000000000..375def62e
--- /dev/null
+++ b/third_party/aom/av1/common/x86/selfguided_avx2.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
+#include "av1/common/restoration.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "aom_dsp/x86/synonyms_avx2.h"
+
+// Load 8 bytes from the possibly-misaligned pointer p, extend each byte to
+// 32-bit precision and return them in an AVX2 register.
+static __m256i yy256_load_extend_8_32(const void *p) {
+ return _mm256_cvtepu8_epi32(xx_loadl_64(p));
+}
+
+// Load 8 halfwords from the possibly-misaligned pointer p, extend each
+// halfword to 32-bit precision and return them in an AVX2 register.
+static __m256i yy256_load_extend_16_32(const void *p) {
+ return _mm256_cvtepu16_epi32(xx_loadu_128(p));
+}
+
+// Compute the scan of an AVX2 register holding 8 32-bit integers. If the
+// register holds x0..x7 then the scan will hold x0, x0+x1, x0+x1+x2, ...,
+// x0+x1+...+x7
+//
+// Let [...] represent a 128-bit block, and let a, ..., h be 32-bit integers
+// (assumed small enough to be able to add them without overflow).
+//
+// Use -> as shorthand for summing, i.e. h->a = h + g + f + e + d + c + b + a.
+//
+// x = [h g f e][d c b a]
+// x01 = [g f e 0][c b a 0]
+// x02 = [g+h f+g e+f e][c+d b+c a+b a]
+// x03 = [e+f e 0 0][a+b a 0 0]
+// x04 = [e->h e->g e->f e][a->d a->c a->b a]
+// s = a->d
+// s01 = [a->d a->d a->d a->d]
+// s02 = [a->d a->d a->d a->d][0 0 0 0]
+// ret = [a->h a->g a->f a->e][a->d a->c a->b a]
+static __m256i scan_32(__m256i x) {
+ const __m256i x01 = _mm256_slli_si256(x, 4);
+ const __m256i x02 = _mm256_add_epi32(x, x01);
+ const __m256i x03 = _mm256_slli_si256(x02, 8);
+ const __m256i x04 = _mm256_add_epi32(x02, x03);
+ const int32_t s = _mm256_extract_epi32(x04, 3);
+ const __m128i s01 = _mm_set1_epi32(s);
+ const __m256i s02 = _mm256_insertf128_si256(_mm256_setzero_si256(), s01, 1);
+ return _mm256_add_epi32(x04, s02);
+}
+
+// Compute two integral images from src. B sums elements; A sums their
+// squares. The images are offset by one pixel, so will have width and height
+// equal to width + 1, height + 1 and the first row and column will be zero.
+//
+// A+1 and B+1 should be aligned to 32 bytes. buf_stride should be a multiple
+// of 8.
+
+static void *memset_zero_avx(int32_t *dest, const __m256i *zero, size_t count) {
+ unsigned int i = 0;
+ for (i = 0; i < (count & 0xffffffe0); i += 32) {
+ _mm256_storeu_si256((__m256i *)(dest + i), *zero);
+ _mm256_storeu_si256((__m256i *)(dest + i + 8), *zero);
+ _mm256_storeu_si256((__m256i *)(dest + i + 16), *zero);
+ _mm256_storeu_si256((__m256i *)(dest + i + 24), *zero);
+ }
+ for (; i < (count & 0xfffffff8); i += 8) {
+ _mm256_storeu_si256((__m256i *)(dest + i), *zero);
+ }
+ for (; i < count; i++) {
+ dest[i] = 0;
+ }
+ return dest;
+}
+
+static void integral_images(const uint8_t *src, int src_stride, int width,
+ int height, int32_t *A, int32_t *B,
+ int buf_stride) {
+ const __m256i zero = _mm256_setzero_si256();
+ // Write out the zero top row
+ memset_zero_avx(A, &zero, (width + 8));
+ memset_zero_avx(B, &zero, (width + 8));
+ for (int i = 0; i < height; ++i) {
+ // Zero the left column.
+ A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
+
+ // ldiff is the difference H - D where H is the output sample immediately
+ // to the left and D is the output sample above it. These are scalars,
+ // replicated across the eight lanes.
+ __m256i ldiff1 = zero, ldiff2 = zero;
+ for (int j = 0; j < width; j += 8) {
+ const int ABj = 1 + j;
+
+ const __m256i above1 = yy_load_256(B + ABj + i * buf_stride);
+ const __m256i above2 = yy_load_256(A + ABj + i * buf_stride);
+
+ const __m256i x1 = yy256_load_extend_8_32(src + j + i * src_stride);
+ const __m256i x2 = _mm256_madd_epi16(x1, x1);
+
+ const __m256i sc1 = scan_32(x1);
+ const __m256i sc2 = scan_32(x2);
+
+ const __m256i row1 =
+ _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1);
+ const __m256i row2 =
+ _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2);
+
+ yy_store_256(B + ABj + (i + 1) * buf_stride, row1);
+ yy_store_256(A + ABj + (i + 1) * buf_stride, row2);
+
+ // Calculate the new H - D.
+ ldiff1 = _mm256_set1_epi32(
+ _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7));
+ ldiff2 = _mm256_set1_epi32(
+ _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7));
+ }
+ }
+}
+
+// Compute two integral images from src. B sums elements; A sums their squares
+//
+// A and B should be aligned to 32 bytes. buf_stride should be a multiple of 8.
+static void integral_images_highbd(const uint16_t *src, int src_stride,
+ int width, int height, int32_t *A,
+ int32_t *B, int buf_stride) {
+ const __m256i zero = _mm256_setzero_si256();
+ // Write out the zero top row
+ memset_zero_avx(A, &zero, (width + 8));
+ memset_zero_avx(B, &zero, (width + 8));
+
+ for (int i = 0; i < height; ++i) {
+ // Zero the left column.
+ A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
+
+ // ldiff is the difference H - D where H is the output sample immediately
+ // to the left and D is the output sample above it. These are scalars,
+ // replicated across the eight lanes.
+ __m256i ldiff1 = zero, ldiff2 = zero;
+ for (int j = 0; j < width; j += 8) {
+ const int ABj = 1 + j;
+
+ const __m256i above1 = yy_load_256(B + ABj + i * buf_stride);
+ const __m256i above2 = yy_load_256(A + ABj + i * buf_stride);
+
+ const __m256i x1 = yy256_load_extend_16_32(src + j + i * src_stride);
+ const __m256i x2 = _mm256_madd_epi16(x1, x1);
+
+ const __m256i sc1 = scan_32(x1);
+ const __m256i sc2 = scan_32(x2);
+
+ const __m256i row1 =
+ _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1);
+ const __m256i row2 =
+ _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2);
+
+ yy_store_256(B + ABj + (i + 1) * buf_stride, row1);
+ yy_store_256(A + ABj + (i + 1) * buf_stride, row2);
+
+ // Calculate the new H - D.
+ ldiff1 = _mm256_set1_epi32(
+ _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7));
+ ldiff2 = _mm256_set1_epi32(
+ _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7));
+ }
+ }
+}
+
+// Compute 8 values of boxsum from the given integral image. ii should point
+// at the middle of the box (for the first value). r is the box radius.
+static INLINE __m256i boxsum_from_ii(const int32_t *ii, int stride, int r) {
+ const __m256i tl = yy_loadu_256(ii - (r + 1) - (r + 1) * stride);
+ const __m256i tr = yy_loadu_256(ii + (r + 0) - (r + 1) * stride);
+ const __m256i bl = yy_loadu_256(ii - (r + 1) + r * stride);
+ const __m256i br = yy_loadu_256(ii + (r + 0) + r * stride);
+ const __m256i u = _mm256_sub_epi32(tr, tl);
+ const __m256i v = _mm256_sub_epi32(br, bl);
+ return _mm256_sub_epi32(v, u);
+}
+
+static __m256i round_for_shift(unsigned shift) {
+ return _mm256_set1_epi32((1 << shift) >> 1);
+}
+
+static __m256i compute_p(__m256i sum1, __m256i sum2, int bit_depth, int n) {
+ __m256i an, bb;
+ if (bit_depth > 8) {
+ const __m256i rounding_a = round_for_shift(2 * (bit_depth - 8));
+ const __m256i rounding_b = round_for_shift(bit_depth - 8);
+ const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8));
+ const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8);
+ const __m256i a =
+ _mm256_srl_epi32(_mm256_add_epi32(sum2, rounding_a), shift_a);
+ const __m256i b =
+ _mm256_srl_epi32(_mm256_add_epi32(sum1, rounding_b), shift_b);
+ // b < 2^14, so we can use a 16-bit madd rather than a 32-bit
+ // mullo to square it
+ bb = _mm256_madd_epi16(b, b);
+ an = _mm256_max_epi32(_mm256_mullo_epi32(a, _mm256_set1_epi32(n)), bb);
+ } else {
+ bb = _mm256_madd_epi16(sum1, sum1);
+ an = _mm256_mullo_epi32(sum2, _mm256_set1_epi32(n));
+ }
+ return _mm256_sub_epi32(an, bb);
+}
+
+// Assumes that C, D are integral images for the original buffer which has been
+// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
+// on the sides. A, B, C, D point at logical position (0, 0).
+static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
+ int width, int height, int buf_stride, int bit_depth,
+ int sgr_params_idx, int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int n = (2 * r + 1) * (2 * r + 1);
+ const __m256i s = _mm256_set1_epi32(params->s[radius_idx]);
+ // one_over_n[n-1] is 2^12/n, so easily fits in an int16
+ const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]);
+
+ const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
+ const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
+
+ // Set up masks
+ const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ __m256i mask[8];
+ for (int idx = 0; idx < 8; idx++) {
+ const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
+ mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
+ }
+
+ for (int i = -1; i < height + 1; ++i) {
+ for (int j = -1; j < width + 1; j += 8) {
+ const int32_t *Cij = C + i * buf_stride + j;
+ const int32_t *Dij = D + i * buf_stride + j;
+
+ __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r);
+ __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r);
+
+ // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain
+ // some uninitialised data in their upper words. We use a mask to
+ // ensure that these bits are set to 0.
+ int idx = AOMMIN(8, width + 1 - j);
+ assert(idx >= 1);
+
+ if (idx < 8) {
+ sum1 = _mm256_and_si256(mask[idx], sum1);
+ sum2 = _mm256_and_si256(mask[idx], sum2);
+ }
+
+ const __m256i p = compute_p(sum1, sum2, bit_depth, n);
+
+ const __m256i z = _mm256_min_epi32(
+ _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z),
+ SGRPROJ_MTABLE_BITS),
+ _mm256_set1_epi32(255));
+
+ const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4);
+
+ yy_storeu_256(A + i * buf_stride + j, a_res);
+
+ const __m256i a_complement =
+ _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res);
+
+ // sum1 might have lanes greater than 2^15, so we can't use madd to do
+ // multiplication involving sum1. However, a_complement and one_over_n
+ // are both less than 256, so we can multiply them first.
+ const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n);
+ const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1);
+ const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res),
+ SGRPROJ_RECIP_BITS);
+
+ yy_storeu_256(B + i * buf_stride + j, b_res);
+ }
+ }
+}
+
+// Calculate 8 values of the "cross sum" starting at buf. This is a 3x3 filter
+// where the outer four corners have weight 3 and all other pixels have weight
+// 4.
+//
+// Pixels are indexed as follows:
+// xtl xt xtr
+// xl x xr
+// xbl xb xbr
+//
+// buf points to x
+//
+// fours = xl + xt + xr + xb + x
+// threes = xtl + xtr + xbr + xbl
+// cross_sum = 4 * fours + 3 * threes
+// = 4 * (fours + threes) - threes
+// = (fours + threes) << 2 - threes
+static INLINE __m256i cross_sum(const int32_t *buf, int stride) {
+ const __m256i xtl = yy_loadu_256(buf - 1 - stride);
+ const __m256i xt = yy_loadu_256(buf - stride);
+ const __m256i xtr = yy_loadu_256(buf + 1 - stride);
+ const __m256i xl = yy_loadu_256(buf - 1);
+ const __m256i x = yy_loadu_256(buf);
+ const __m256i xr = yy_loadu_256(buf + 1);
+ const __m256i xbl = yy_loadu_256(buf - 1 + stride);
+ const __m256i xb = yy_loadu_256(buf + stride);
+ const __m256i xbr = yy_loadu_256(buf + 1 + stride);
+
+ const __m256i fours = _mm256_add_epi32(
+ xl, _mm256_add_epi32(xt, _mm256_add_epi32(xr, _mm256_add_epi32(xb, x))));
+ const __m256i threes =
+ _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl)));
+
+ return _mm256_sub_epi32(_mm256_slli_epi32(_mm256_add_epi32(fours, threes), 2),
+ threes);
+}
+
+// The final filter for self-guided restoration. Computes a weighted average
+// across A, B with "cross sums" (see cross_sum implementation above).
+static void final_filter(int32_t *dst, int dst_stride, const int32_t *A,
+ const int32_t *B, int buf_stride, const void *dgd8,
+ int dgd_stride, int width, int height, int highbd) {
+ const int nb = 5;
+ const __m256i rounding =
+ round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+ const uint8_t *dgd_real =
+ highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
+
+ for (int i = 0; i < height; ++i) {
+ for (int j = 0; j < width; j += 8) {
+ const __m256i a = cross_sum(A + i * buf_stride + j, buf_stride);
+ const __m256i b = cross_sum(B + i * buf_stride + j, buf_stride);
+
+ const __m128i raw =
+ xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m256i src =
+ highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
+
+ __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
+ __m256i w = _mm256_srai_epi32(_mm256_add_epi32(v, rounding),
+ SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+
+ yy_storeu_256(dst + i * dst_stride + j, w);
+ }
+ }
+}
+
+// Assumes that C, D are integral images for the original buffer which has been
+// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
+// on the sides. A, B, C, D point at logical position (0, 0).
+static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
+ const int32_t *D, int width, int height,
+ int buf_stride, int bit_depth, int sgr_params_idx,
+ int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int n = (2 * r + 1) * (2 * r + 1);
+ const __m256i s = _mm256_set1_epi32(params->s[radius_idx]);
+ // one_over_n[n-1] is 2^12/n, so easily fits in an int16
+ const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]);
+
+ const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
+ const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
+
+ // Set up masks
+ const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ __m256i mask[8];
+ for (int idx = 0; idx < 8; idx++) {
+ const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
+ mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
+ }
+
+ for (int i = -1; i < height + 1; i += 2) {
+ for (int j = -1; j < width + 1; j += 8) {
+ const int32_t *Cij = C + i * buf_stride + j;
+ const int32_t *Dij = D + i * buf_stride + j;
+
+ __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r);
+ __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r);
+
+ // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain
+ // some uninitialised data in their upper words. We use a mask to
+ // ensure that these bits are set to 0.
+ int idx = AOMMIN(8, width + 1 - j);
+ assert(idx >= 1);
+
+ if (idx < 8) {
+ sum1 = _mm256_and_si256(mask[idx], sum1);
+ sum2 = _mm256_and_si256(mask[idx], sum2);
+ }
+
+ const __m256i p = compute_p(sum1, sum2, bit_depth, n);
+
+ const __m256i z = _mm256_min_epi32(
+ _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z),
+ SGRPROJ_MTABLE_BITS),
+ _mm256_set1_epi32(255));
+
+ const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4);
+
+ yy_storeu_256(A + i * buf_stride + j, a_res);
+
+ const __m256i a_complement =
+ _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res);
+
+ // sum1 might have lanes greater than 2^15, so we can't use madd to do
+ // multiplication involving sum1. However, a_complement and one_over_n
+ // are both less than 256, so we can multiply them first.
+ const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n);
+ const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1);
+ const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res),
+ SGRPROJ_RECIP_BITS);
+
+ yy_storeu_256(B + i * buf_stride + j, b_res);
+ }
+ }
+}
+
+// Calculate 8 values of the "cross sum" starting at buf.
+//
+// Pixels are indexed like this:
+// xtl xt xtr
+// - buf -
+// xbl xb xbr
+//
+// Pixels are weighted like this:
+// 5 6 5
+// 0 0 0
+// 5 6 5
+//
+// fives = xtl + xtr + xbl + xbr
+// sixes = xt + xb
+// cross_sum = 6 * sixes + 5 * fives
+// = 5 * (fives + sixes) - sixes
+// = (fives + sixes) << 2 + (fives + sixes) + sixes
+static INLINE __m256i cross_sum_fast_even_row(const int32_t *buf, int stride) {
+ const __m256i xtl = yy_loadu_256(buf - 1 - stride);
+ const __m256i xt = yy_loadu_256(buf - stride);
+ const __m256i xtr = yy_loadu_256(buf + 1 - stride);
+ const __m256i xbl = yy_loadu_256(buf - 1 + stride);
+ const __m256i xb = yy_loadu_256(buf + stride);
+ const __m256i xbr = yy_loadu_256(buf + 1 + stride);
+
+ const __m256i fives =
+ _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl)));
+ const __m256i sixes = _mm256_add_epi32(xt, xb);
+ const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes);
+
+ return _mm256_add_epi32(
+ _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2),
+ fives_plus_sixes),
+ sixes);
+}
+
+// Calculate 8 values of the "cross sum" starting at buf.
+//
+// Pixels are indexed like this:
+// xl x xr
+//
+// Pixels are weighted like this:
+// 5 6 5
+//
+// buf points to x
+//
+// fives = xl + xr
+// sixes = x
+// cross_sum = 5 * fives + 6 * sixes
+// = 4 * (fives + sixes) + (fives + sixes) + sixes
+// = (fives + sixes) << 2 + (fives + sixes) + sixes
+static INLINE __m256i cross_sum_fast_odd_row(const int32_t *buf) {
+ const __m256i xl = yy_loadu_256(buf - 1);
+ const __m256i x = yy_loadu_256(buf);
+ const __m256i xr = yy_loadu_256(buf + 1);
+
+ const __m256i fives = _mm256_add_epi32(xl, xr);
+ const __m256i sixes = x;
+
+ const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes);
+
+ return _mm256_add_epi32(
+ _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2),
+ fives_plus_sixes),
+ sixes);
+}
+
+// The final filter for the self-guided restoration. Computes a
+// weighted average across A, B with "cross sums" (see cross_sum_...
+// implementations above).
+static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
+ const int32_t *B, int buf_stride,
+ const void *dgd8, int dgd_stride, int width,
+ int height, int highbd) {
+ const int nb0 = 5;
+ const int nb1 = 4;
+
+ const __m256i rounding0 =
+ round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
+ const __m256i rounding1 =
+ round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
+
+ const uint8_t *dgd_real =
+ highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
+
+ for (int i = 0; i < height; ++i) {
+ if (!(i & 1)) { // even row
+ for (int j = 0; j < width; j += 8) {
+ const __m256i a =
+ cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride);
+ const __m256i b =
+ cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride);
+
+ const __m128i raw =
+ xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m256i src =
+ highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
+
+ __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
+ __m256i w =
+ _mm256_srai_epi32(_mm256_add_epi32(v, rounding0),
+ SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
+
+ yy_storeu_256(dst + i * dst_stride + j, w);
+ }
+ } else { // odd row
+ for (int j = 0; j < width; j += 8) {
+ const __m256i a = cross_sum_fast_odd_row(A + i * buf_stride + j);
+ const __m256i b = cross_sum_fast_odd_row(B + i * buf_stride + j);
+
+ const __m128i raw =
+ xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m256i src =
+ highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
+
+ __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
+ __m256i w =
+ _mm256_srai_epi32(_mm256_add_epi32(v, rounding1),
+ SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
+
+ yy_storeu_256(dst + i * dst_stride + j, w);
+ }
+ }
+ }
+}
+
+void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
+ int dgd_stride, int32_t *flt0,
+ int32_t *flt1, int flt_stride,
+ int sgr_params_idx, int bit_depth,
+ int highbd) {
+ // The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl,
+ // Ctl and Dtl is 32-byte aligned.
+ const int buf_elts = ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3);
+
+ DECLARE_ALIGNED(32, int32_t,
+ buf[4 * ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3)]);
+
+ const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+ const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
+
+ // Adjusting the stride of A and B here appears to avoid bad cache effects,
+ // leading to a significant speed improvement.
+ // We also align the stride to a multiple of 32 bytes for efficiency.
+ int buf_stride = ALIGN_POWER_OF_TWO(width_ext + 16, 3);
+
+ // The "tl" pointers point at the top-left of the initialised data for the
+ // array.
+ int32_t *Atl = buf + 0 * buf_elts + 7;
+ int32_t *Btl = buf + 1 * buf_elts + 7;
+ int32_t *Ctl = buf + 2 * buf_elts + 7;
+ int32_t *Dtl = buf + 3 * buf_elts + 7;
+
+ // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note
+ // there's a zero row and column in A, B (integral images), so we move down
+ // and right one for them.
+ const int buf_diag_border =
+ SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT;
+
+ int32_t *A0 = Atl + 1 + buf_stride;
+ int32_t *B0 = Btl + 1 + buf_stride;
+ int32_t *C0 = Ctl + 1 + buf_stride;
+ int32_t *D0 = Dtl + 1 + buf_stride;
+
+ // Finally, A, B, C, D point at position (0, 0).
+ int32_t *A = A0 + buf_diag_border;
+ int32_t *B = B0 + buf_diag_border;
+ int32_t *C = C0 + buf_diag_border;
+ int32_t *D = D0 + buf_diag_border;
+
+ const int dgd_diag_border =
+ SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT;
+ const uint8_t *dgd0 = dgd8 - dgd_diag_border;
+
+ // Generate integral images from the input. C will contain sums of squares; D
+ // will contain just sums
+ if (highbd)
+ integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext,
+ height_ext, Ctl, Dtl, buf_stride);
+ else
+ integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
+ buf_stride);
+
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ // Write to flt0 and flt1
+ // If params->r == 0 we skip the corresponding filter. We only allow one of
+ // the radii to be 0, as having both equal to 0 would be equivalent to
+ // skipping SGR entirely.
+ assert(!(params->r[0] == 0 && params->r[1] == 0));
+ assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+ assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+
+ if (params->r[0] > 0) {
+ calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth,
+ sgr_params_idx, 0);
+ final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
+ width, height, highbd);
+ }
+
+ if (params->r[1] > 0) {
+ calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx,
+ 1);
+ final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+ height, highbd);
+ }
+}
+
+void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
+ int height, int stride, int eps,
+ const int *xqd, uint8_t *dst8,
+ int dst_stride, int32_t *tmpbuf,
+ int bit_depth, int highbd) {
+ int32_t *flt0 = tmpbuf;
+ int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
+ assert(width * height <= RESTORATION_UNITPELS_MAX);
+ av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1,
+ width, eps, bit_depth, highbd);
+ const sgr_params_type *const params = &sgr_params[eps];
+ int xq[2];
+ decode_xq(xqd, xq, params);
+
+ __m256i xq0 = _mm256_set1_epi32(xq[0]);
+ __m256i xq1 = _mm256_set1_epi32(xq[1]);
+
+ for (int i = 0; i < height; ++i) {
+ // Calculate output in batches of 16 pixels
+ for (int j = 0; j < width; j += 16) {
+ const int k = i * width + j;
+ const int m = i * dst_stride + j;
+
+ const uint8_t *dat8ij = dat8 + i * stride + j;
+ __m256i ep_0, ep_1;
+ __m128i src_0, src_1;
+ if (highbd) {
+ src_0 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij));
+ src_1 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij + 8));
+ ep_0 = _mm256_cvtepu16_epi32(src_0);
+ ep_1 = _mm256_cvtepu16_epi32(src_1);
+ } else {
+ src_0 = xx_loadu_128(dat8ij);
+ ep_0 = _mm256_cvtepu8_epi32(src_0);
+ ep_1 = _mm256_cvtepu8_epi32(_mm_srli_si128(src_0, 8));
+ }
+
+ const __m256i u_0 = _mm256_slli_epi32(ep_0, SGRPROJ_RST_BITS);
+ const __m256i u_1 = _mm256_slli_epi32(ep_1, SGRPROJ_RST_BITS);
+
+ __m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
+ __m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
+
+ if (params->r[0] > 0) {
+ const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
+ v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0));
+
+ const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
+ v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1));
+ }
+
+ if (params->r[1] > 0) {
+ const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
+ v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0));
+
+ const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
+ v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1));
+ }
+
+ const __m256i rounding =
+ round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
+ const __m256i w_0 = _mm256_srai_epi32(
+ _mm256_add_epi32(v_0, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
+ const __m256i w_1 = _mm256_srai_epi32(
+ _mm256_add_epi32(v_1, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
+
+ if (highbd) {
+ // Pack into 16 bits and clamp to [0, 2^bit_depth)
+ // Note that packing into 16 bits messes up the order of the bits,
+ // so we use a permute function to correct this
+ const __m256i tmp = _mm256_packus_epi32(w_0, w_1);
+ const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8);
+ const __m256i max = _mm256_set1_epi16((1 << bit_depth) - 1);
+ const __m256i res = _mm256_min_epi16(tmp2, max);
+ yy_storeu_256(CONVERT_TO_SHORTPTR(dst8 + m), res);
+ } else {
+ // Pack into 8 bits and clamp to [0, 256)
+ // Note that each pack messes up the order of the bits,
+ // so we use a permute function to correct this
+ const __m256i tmp = _mm256_packs_epi32(w_0, w_1);
+ const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8);
+ const __m256i res =
+ _mm256_packus_epi16(tmp2, tmp2 /* "don't care" value */);
+ const __m128i res2 =
+ _mm256_castsi256_si128(_mm256_permute4x64_epi64(res, 0xd8));
+ xx_storeu_128(dst8 + m, res2);
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c
index 9de9177c1..a42c94028 100644
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ b/third_party/aom/av1/common/x86/selfguided_sse4.c
@@ -1,1821 +1,643 @@
#include <smmintrin.h>
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
+#include "config/aom_config.h"
+#include "config/av1_rtcd.h"
+
#include "av1/common/restoration.h"
#include "aom_dsp/x86/synonyms.h"
-/* Calculate four consecutive entries of the intermediate A and B arrays
- (corresponding to the first loop in the C version of
- av1_selfguided_restoration)
-*/
-static void calc_block(__m128i sum, __m128i sum_sq, __m128i n,
- __m128i *one_over_n_, __m128i *s_, int bit_depth,
- int idx, int32_t *A, int32_t *B) {
- __m128i a, b, p;
- __m128i one_over_n = *one_over_n_;
- __m128i s = *s_;
-#if CONFIG_HIGHBITDEPTH
- if (bit_depth > 8) {
- __m128i rounding_a = _mm_set1_epi32((1 << (2 * (bit_depth - 8))) >> 1);
- __m128i rounding_b = _mm_set1_epi32((1 << (bit_depth - 8)) >> 1);
- __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8));
- __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8);
- a = _mm_srl_epi32(_mm_add_epi32(sum_sq, rounding_a), shift_a);
- b = _mm_srl_epi32(_mm_add_epi32(sum, rounding_b), shift_b);
- a = _mm_mullo_epi32(a, n);
- b = _mm_mullo_epi32(b, b);
- p = _mm_sub_epi32(_mm_max_epi32(a, b), b);
- } else {
-#endif
- (void)bit_depth;
- a = _mm_mullo_epi32(sum_sq, n);
- b = _mm_mullo_epi32(sum, sum);
- p = _mm_sub_epi32(a, b);
-#if CONFIG_HIGHBITDEPTH
- }
-#endif
-
- __m128i rounding_z = _mm_set1_epi32((1 << SGRPROJ_MTABLE_BITS) >> 1);
- __m128i z = _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rounding_z),
- SGRPROJ_MTABLE_BITS);
- z = _mm_min_epi32(z, _mm_set1_epi32(255));
-
- // 'Gather' type instructions are not available pre-AVX2, so synthesize a
- // gather using scalar loads.
- __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)],
- x_by_xplus1[_mm_extract_epi32(z, 2)],
- x_by_xplus1[_mm_extract_epi32(z, 1)],
- x_by_xplus1[_mm_extract_epi32(z, 0)]);
-
- _mm_storeu_si128((__m128i *)&A[idx], a_res);
-
- __m128i rounding_res = _mm_set1_epi32((1 << SGRPROJ_RECIP_BITS) >> 1);
- __m128i a_complement = _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res);
- __m128i b_int =
- _mm_mullo_epi32(a_complement, _mm_mullo_epi32(sum, one_over_n));
- __m128i b_res =
- _mm_srli_epi32(_mm_add_epi32(b_int, rounding_res), SGRPROJ_RECIP_BITS);
-
- _mm_storeu_si128((__m128i *)&B[idx], b_res);
+// Load 4 bytes from the possibly-misaligned pointer p, extend each byte to
+// 32-bit precision and return them in an SSE register.
+static __m128i xx_load_extend_8_32(const void *p) {
+ return _mm_cvtepu8_epi32(xx_loadl_32(p));
}
-static void selfguided_restoration_1_v(uint8_t *src, int width, int height,
- int src_stride, int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
-
- // Vertical sum
- // When the width is not a multiple of 4, we know that 'stride' is rounded up
- // to a multiple of 4. So it is safe for this loop to calculate extra columns
- // at the right-hand edge of the frame.
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, x, y, x2, y2;
- __m128i sum, sum_sq, tmp;
-
- a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
- b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
-
- sum = _mm_cvtepi16_epi32(_mm_add_epi16(a, b));
- tmp = _mm_unpacklo_epi16(a, b);
- sum_sq = _mm_madd_epi16(tmp, tmp);
-
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
-
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- for (i = 1; i < height - 2; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
- y = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i + 2) * src_stride + j]));
-
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
-
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
-
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
- }
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
+// Load 4 halfwords from the possibly-misaligned pointer p, extend each
+// halfword to 32-bit precision and return them in an SSE register.
+static __m128i xx_load_extend_16_32(const void *p) {
+ return _mm_cvtepu16_epi32(xx_loadl_64(p));
+}
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
- }
+// Compute the scan of an SSE register holding 4 32-bit integers. If the
+// register holds x0..x3 then the scan will hold x0, x0+x1, x0+x1+x2,
+// x0+x1+x2+x3
+static __m128i scan_32(__m128i x) {
+ const __m128i x01 = _mm_add_epi32(x, _mm_slli_si128(x, 4));
+ return _mm_add_epi32(x01, _mm_slli_si128(x01, 8));
}
-static void selfguided_restoration_1_h(int32_t *A, int32_t *B, int width,
- int height, int buf_stride, int eps,
- int bit_depth) {
- int i, j;
-
- // Horizontal sum
- int width_extend = (width + 3) & ~3;
- for (i = 0; i < height; ++i) {
- int h = AOMMIN(2, height - i) + AOMMIN(1, i);
-
- __m128i a1 = _mm_loadu_si128((__m128i *)&A[i * buf_stride]);
- __m128i b1 = _mm_loadu_si128((__m128i *)&B[i * buf_stride]);
- __m128i a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + 4]);
- __m128i b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + 4]);
-
- // Note: The _mm_slli_si128 call sets up a register containing
- // {0, A[i * buf_stride], ..., A[i * buf_stride + 2]},
- // so that the first element of 'sum' (which should only add two values
- // together) ends up calculated correctly.
- __m128i sum_ = _mm_add_epi32(_mm_slli_si128(b1, 4),
- _mm_add_epi32(b1, _mm_alignr_epi8(b2, b1, 4)));
- __m128i sum_sq_ = _mm_add_epi32(
- _mm_slli_si128(a1, 4), _mm_add_epi32(a1, _mm_alignr_epi8(a2, a1, 4)));
- __m128i n = _mm_set_epi32(3 * h, 3 * h, 3 * h, 2 * h);
- __m128i one_over_n =
- _mm_set_epi32(one_by_x[3 * h - 1], one_by_x[3 * h - 1],
- one_by_x[3 * h - 1], one_by_x[2 * h - 1]);
- __m128i s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
- sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][2 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
- B);
-
- n = _mm_set1_epi32(3 * h);
- one_over_n = _mm_set1_epi32(one_by_x[3 * h - 1]);
- s = _mm_set1_epi32(sgrproj_mtable[eps - 1][3 * h - 1]);
-
- // Re-align a1 and b1 so that they start at index i * buf_stride + 3
- a2 = _mm_alignr_epi8(a2, a1, 12);
- b2 = _mm_alignr_epi8(b2, b1, 12);
-
- // Note: When the width is not a multiple of 4, this loop may end up
- // writing to the last 4 columns of the frame, potentially with incorrect
- // values (especially for r=2 and r=3).
- // This is fine, since we fix up those values in the block after this
- // loop, and in exchange we never have more than four values to
- // write / fix up after this loop finishes.
- for (j = 4; j < width_extend - 4; j += 4) {
- a1 = a2;
- b1 = b2;
- a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 3]);
- b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 3]);
- /* Loop invariant: At this point,
- a1 = original A[i * buf_stride + j - 1 : i * buf_stride + j + 3]
- a2 = original A[i * buf_stride + j + 3 : i * buf_stride + j + 7]
- and similar for b1,b2 and B
- */
- sum_ = _mm_add_epi32(b1, _mm_add_epi32(_mm_alignr_epi8(b2, b1, 4),
- _mm_alignr_epi8(b2, b1, 8)));
- sum_sq_ = _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
- _mm_alignr_epi8(a2, a1, 8)));
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
- i * buf_stride + j, A, B);
- }
- __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 3]);
- __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 3]);
-
- j = width - 4;
- switch (width % 4) {
- case 0:
- a1 = a2;
- b1 = b2;
- a2 = a3;
- b2 = b3;
- break;
- case 1:
- a1 = _mm_alignr_epi8(a2, a1, 4);
- b1 = _mm_alignr_epi8(b2, b1, 4);
- a2 = _mm_alignr_epi8(a3, a2, 4);
- b2 = _mm_alignr_epi8(b3, b2, 4);
- break;
- case 2:
- a1 = _mm_alignr_epi8(a2, a1, 8);
- b1 = _mm_alignr_epi8(b2, b1, 8);
- a2 = _mm_alignr_epi8(a3, a2, 8);
- b2 = _mm_alignr_epi8(b3, b2, 8);
- break;
- case 3:
- a1 = _mm_alignr_epi8(a2, a1, 12);
- b1 = _mm_alignr_epi8(b2, b1, 12);
- a2 = _mm_alignr_epi8(a3, a2, 12);
- b2 = _mm_alignr_epi8(b3, b2, 12);
- break;
+// Compute two integral images from src. B sums elements; A sums their
+// squares. The images are offset by one pixel, so will have width and height
+// equal to width + 1, height + 1 and the first row and column will be zero.
+//
+// A+1 and B+1 should be aligned to 16 bytes. buf_stride should be a multiple
+// of 4.
+static void integral_images(const uint8_t *src, int src_stride, int width,
+ int height, int32_t *A, int32_t *B,
+ int buf_stride) {
+ // Write out the zero top row
+ memset(A, 0, sizeof(*A) * (width + 1));
+ memset(B, 0, sizeof(*B) * (width + 1));
+
+ const __m128i zero = _mm_setzero_si128();
+ for (int i = 0; i < height; ++i) {
+ // Zero the left column.
+ A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
+
+ // ldiff is the difference H - D where H is the output sample immediately
+ // to the left and D is the output sample above it. These are scalars,
+ // replicated across the four lanes.
+ __m128i ldiff1 = zero, ldiff2 = zero;
+ for (int j = 0; j < width; j += 4) {
+ const int ABj = 1 + j;
+
+ const __m128i above1 = xx_load_128(B + ABj + i * buf_stride);
+ const __m128i above2 = xx_load_128(A + ABj + i * buf_stride);
+
+ const __m128i x1 = xx_load_extend_8_32(src + j + i * src_stride);
+ const __m128i x2 = _mm_madd_epi16(x1, x1);
+
+ const __m128i sc1 = scan_32(x1);
+ const __m128i sc2 = scan_32(x2);
+
+ const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1);
+ const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2);
+
+ xx_store_128(B + ABj + (i + 1) * buf_stride, row1);
+ xx_store_128(A + ABj + (i + 1) * buf_stride, row2);
+
+ // Calculate the new H - D.
+ ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff);
+ ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff);
}
-
- // Zero out the data loaded from "off the edge" of the array
- __m128i zero = _mm_setzero_si128();
- a2 = _mm_blend_epi16(a2, zero, 0xfc);
- b2 = _mm_blend_epi16(b2, zero, 0xfc);
-
- sum_ = _mm_add_epi32(b1, _mm_add_epi32(_mm_alignr_epi8(b2, b1, 4),
- _mm_alignr_epi8(b2, b1, 8)));
- sum_sq_ = _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
- _mm_alignr_epi8(a2, a1, 8)));
- n = _mm_set_epi32(2 * h, 3 * h, 3 * h, 3 * h);
- one_over_n = _mm_set_epi32(one_by_x[2 * h - 1], one_by_x[3 * h - 1],
- one_by_x[3 * h - 1], one_by_x[3 * h - 1]);
- s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][2 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1],
- sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
- A, B);
}
}
-static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
- int src_stride, int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
-
- // Vertical sum
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, c, c2, x, y, x2, y2;
- __m128i sum, sum_sq, tmp;
-
- a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
- b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
- c = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
+// Compute two integral images from src. B sums elements; A sums their squares
+//
+// A and B should be aligned to 16 bytes. buf_stride should be a multiple of 4.
+static void integral_images_highbd(const uint16_t *src, int src_stride,
+ int width, int height, int32_t *A,
+ int32_t *B, int buf_stride) {
+ // Write out the zero top row
+ memset(A, 0, sizeof(*A) * (width + 1));
+ memset(B, 0, sizeof(*B) * (width + 1));
- sum = _mm_cvtepi16_epi32(_mm_add_epi16(_mm_add_epi16(a, b), c));
- // Important: Since c may be up to 2^8, the result on squaring may
- // be up to 2^16. So we need to zero-extend, not sign-extend.
- c2 = _mm_cvtepu16_epi32(_mm_mullo_epi16(c, c));
- tmp = _mm_unpacklo_epi16(a, b);
- sum_sq = _mm_add_epi32(_mm_madd_epi16(tmp, tmp), c2);
+ const __m128i zero = _mm_setzero_si128();
+ for (int i = 0; i < height; ++i) {
+ // Zero the left column.
+ A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
+ // ldiff is the difference H - D where H is the output sample immediately
+ // to the left and D is the output sample above it. These are scalars,
+ // replicated across the four lanes.
+ __m128i ldiff1 = zero, ldiff2 = zero;
+ for (int j = 0; j < width; j += 4) {
+ const int ABj = 1 + j;
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[3 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ const __m128i above1 = xx_load_128(B + ABj + i * buf_stride);
+ const __m128i above2 = xx_load_128(A + ABj + i * buf_stride);
- _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
+ const __m128i x1 = xx_load_extend_16_32(src + j + i * src_stride);
+ const __m128i x2 = _mm_madd_epi16(x1, x1);
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[4 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ const __m128i sc1 = scan_32(x1);
+ const __m128i sc2 = scan_32(x2);
- for (i = 2; i < height - 3; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
+ const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1);
+ const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2);
- x = _mm_cvtepu8_epi32(
- _mm_cvtsi32_si128(*((int *)&src[(i - 2) * src_stride + j])));
- y = _mm_cvtepu8_epi32(
- _mm_cvtsi32_si128(*((int *)&src[(i + 3) * src_stride + j])));
+ xx_store_128(B + ABj + (i + 1) * buf_stride, row1);
+ xx_store_128(A + ABj + (i + 1) * buf_stride, row2);
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
-
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
-
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
+ // Calculate the new H - D.
+ ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff);
+ ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff);
}
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 2) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 2) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 2) * buf_stride + j], sum_sq);
}
}
-static void selfguided_restoration_2_h(int32_t *A, int32_t *B, int width,
- int height, int buf_stride, int eps,
- int bit_depth) {
- int i, j;
-
- // Horizontal sum
- int width_extend = (width + 3) & ~3;
- for (i = 0; i < height; ++i) {
- int h = AOMMIN(3, height - i) + AOMMIN(2, i);
-
- __m128i a1 = _mm_loadu_si128((__m128i *)&A[i * buf_stride]);
- __m128i b1 = _mm_loadu_si128((__m128i *)&B[i * buf_stride]);
- __m128i a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + 4]);
- __m128i b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + 4]);
-
- __m128i sum_ = _mm_add_epi32(
- _mm_add_epi32(
- _mm_add_epi32(_mm_slli_si128(b1, 8), _mm_slli_si128(b1, 4)),
- _mm_add_epi32(b1, _mm_alignr_epi8(b2, b1, 4))),
- _mm_alignr_epi8(b2, b1, 8));
- __m128i sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(
- _mm_add_epi32(_mm_slli_si128(a1, 8), _mm_slli_si128(a1, 4)),
- _mm_add_epi32(a1, _mm_alignr_epi8(a2, a1, 4))),
- _mm_alignr_epi8(a2, a1, 8));
-
- __m128i n = _mm_set_epi32(5 * h, 5 * h, 4 * h, 3 * h);
- __m128i one_over_n =
- _mm_set_epi32(one_by_x[5 * h - 1], one_by_x[5 * h - 1],
- one_by_x[4 * h - 1], one_by_x[3 * h - 1]);
- __m128i s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
- sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][3 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
- B);
-
- // Re-align a1 and b1 so that they start at index i * buf_stride + 2
- a2 = _mm_alignr_epi8(a2, a1, 8);
- b2 = _mm_alignr_epi8(b2, b1, 8);
-
- n = _mm_set1_epi32(5 * h);
- one_over_n = _mm_set1_epi32(one_by_x[5 * h - 1]);
- s = _mm_set1_epi32(sgrproj_mtable[eps - 1][5 * h - 1]);
-
- for (j = 4; j < width_extend - 4; j += 4) {
- a1 = a2;
- a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 2]);
- b1 = b2;
- b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 2]);
- /* Loop invariant: At this point,
- a1 = original A[i * buf_stride + j - 2 : i * buf_stride + j + 2]
- a2 = original A[i * buf_stride + j + 2 : i * buf_stride + j + 6]
- and similar for b1,b2 and B
- */
- sum_ = _mm_add_epi32(
- _mm_add_epi32(b1, _mm_add_epi32(_mm_alignr_epi8(b2, b1, 4),
- _mm_alignr_epi8(b2, b1, 8))),
- _mm_add_epi32(_mm_alignr_epi8(b2, b1, 12), b2));
- sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
- _mm_alignr_epi8(a2, a1, 8))),
- _mm_add_epi32(_mm_alignr_epi8(a2, a1, 12), a2));
-
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
- i * buf_stride + j, A, B);
- }
- // If the width is not a multiple of 4, we need to reset j to width - 4
- // and adjust a1, a2, b1, b2 so that the loop invariant above is maintained
- __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 2]);
- __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 2]);
-
- j = width - 4;
- switch (width % 4) {
- case 0:
- a1 = a2;
- b1 = b2;
- a2 = a3;
- b2 = b3;
- break;
- case 1:
- a1 = _mm_alignr_epi8(a2, a1, 4);
- b1 = _mm_alignr_epi8(b2, b1, 4);
- a2 = _mm_alignr_epi8(a3, a2, 4);
- b2 = _mm_alignr_epi8(b3, b2, 4);
- break;
- case 2:
- a1 = _mm_alignr_epi8(a2, a1, 8);
- b1 = _mm_alignr_epi8(b2, b1, 8);
- a2 = _mm_alignr_epi8(a3, a2, 8);
- b2 = _mm_alignr_epi8(b3, b2, 8);
- break;
- case 3:
- a1 = _mm_alignr_epi8(a2, a1, 12);
- b1 = _mm_alignr_epi8(b2, b1, 12);
- a2 = _mm_alignr_epi8(a3, a2, 12);
- b2 = _mm_alignr_epi8(b3, b2, 12);
- break;
- }
-
- // Zero out the data loaded from "off the edge" of the array
- __m128i zero = _mm_setzero_si128();
- a2 = _mm_blend_epi16(a2, zero, 0xf0);
- b2 = _mm_blend_epi16(b2, zero, 0xf0);
-
- sum_ = _mm_add_epi32(
- _mm_add_epi32(b1, _mm_add_epi32(_mm_alignr_epi8(b2, b1, 4),
- _mm_alignr_epi8(b2, b1, 8))),
- _mm_add_epi32(_mm_alignr_epi8(b2, b1, 12), b2));
- sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(a1, _mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
- _mm_alignr_epi8(a2, a1, 8))),
- _mm_add_epi32(_mm_alignr_epi8(a2, a1, 12), a2));
-
- n = _mm_set_epi32(3 * h, 4 * h, 5 * h, 5 * h);
- one_over_n = _mm_set_epi32(one_by_x[3 * h - 1], one_by_x[4 * h - 1],
- one_by_x[5 * h - 1], one_by_x[5 * h - 1]);
- s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][3 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1],
- sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
- A, B);
- }
+// Compute 4 values of boxsum from the given integral image. ii should point
+// at the middle of the box (for the first value). r is the box radius.
+static INLINE __m128i boxsum_from_ii(const int32_t *ii, int stride, int r) {
+ const __m128i tl = xx_loadu_128(ii - (r + 1) - (r + 1) * stride);
+ const __m128i tr = xx_loadu_128(ii + (r + 0) - (r + 1) * stride);
+ const __m128i bl = xx_loadu_128(ii - (r + 1) + r * stride);
+ const __m128i br = xx_loadu_128(ii + (r + 0) + r * stride);
+ const __m128i u = _mm_sub_epi32(tr, tl);
+ const __m128i v = _mm_sub_epi32(br, bl);
+ return _mm_sub_epi32(v, u);
}
-static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
- int src_stride, int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
-
- // Vertical sum over 7-pixel regions, 4 columns at a time
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, c, d, x, y, x2, y2;
- __m128i sum, sum_sq, tmp, tmp2;
+static __m128i round_for_shift(unsigned shift) {
+ return _mm_set1_epi32((1 << shift) >> 1);
+}
- a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
- b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
- c = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
- d = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[3 * src_stride + j]));
+static __m128i compute_p(__m128i sum1, __m128i sum2, int bit_depth, int n) {
+ __m128i an, bb;
+ if (bit_depth > 8) {
+ const __m128i rounding_a = round_for_shift(2 * (bit_depth - 8));
+ const __m128i rounding_b = round_for_shift(bit_depth - 8);
+ const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8));
+ const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8);
+ const __m128i a = _mm_srl_epi32(_mm_add_epi32(sum2, rounding_a), shift_a);
+ const __m128i b = _mm_srl_epi32(_mm_add_epi32(sum1, rounding_b), shift_b);
+ // b < 2^14, so we can use a 16-bit madd rather than a 32-bit
+ // mullo to square it
+ bb = _mm_madd_epi16(b, b);
+ an = _mm_max_epi32(_mm_mullo_epi32(a, _mm_set1_epi32(n)), bb);
+ } else {
+ bb = _mm_madd_epi16(sum1, sum1);
+ an = _mm_mullo_epi32(sum2, _mm_set1_epi32(n));
+ }
+ return _mm_sub_epi32(an, bb);
+}
- sum = _mm_cvtepi16_epi32(
- _mm_add_epi16(_mm_add_epi16(a, b), _mm_add_epi16(c, d)));
- tmp = _mm_unpacklo_epi16(a, b);
- tmp2 = _mm_unpacklo_epi16(c, d);
- sum_sq =
- _mm_add_epi32(_mm_madd_epi16(tmp, tmp), _mm_madd_epi16(tmp2, tmp2));
+// Assumes that C, D are integral images for the original buffer which has been
+// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
+// on the sides. A, B, C, D point at logical position (0, 0).
+static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
+ int width, int height, int buf_stride, int bit_depth,
+ int sgr_params_idx, int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int n = (2 * r + 1) * (2 * r + 1);
+ const __m128i s = _mm_set1_epi32(params->s[radius_idx]);
+ // one_over_n[n-1] is 2^12/n, so easily fits in an int16
+ const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]);
+
+ const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
+ const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
+
+ // Set up masks
+ const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ __m128i mask[4];
+ for (int idx = 0; idx < 4; idx++) {
+ const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
+ mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
+ }
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
+ for (int i = -1; i < height + 1; ++i) {
+ for (int j = -1; j < width + 1; j += 4) {
+ const int32_t *Cij = C + i * buf_stride + j;
+ const int32_t *Dij = D + i * buf_stride + j;
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[4 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r);
+ __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r);
- _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
+ // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain
+ // some uninitialised data in their upper words. We use a mask to
+ // ensure that these bits are set to 0.
+ int idx = AOMMIN(4, width + 1 - j);
+ assert(idx >= 1);
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[5 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ if (idx < 4) {
+ sum1 = _mm_and_si128(mask[idx], sum1);
+ sum2 = _mm_and_si128(mask[idx], sum2);
+ }
- _mm_store_si128((__m128i *)&B[2 * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[2 * buf_stride + j], sum_sq);
+ const __m128i p = compute_p(sum1, sum2, bit_depth, n);
- x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[6 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ const __m128i z = _mm_min_epi32(
+ _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z),
+ SGRPROJ_MTABLE_BITS),
+ _mm_set1_epi32(255));
- for (i = 3; i < height - 4; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
+ // 'Gather' type instructions are not available pre-AVX2, so synthesize a
+ // gather using scalar loads.
+ const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)],
+ x_by_xplus1[_mm_extract_epi32(z, 2)],
+ x_by_xplus1[_mm_extract_epi32(z, 1)],
+ x_by_xplus1[_mm_extract_epi32(z, 0)]);
- x = _mm_cvtepu8_epi32(xx_loadl_32(&src[(i - 3) * src_stride + j]));
- y = _mm_cvtepu8_epi32(xx_loadl_32(&src[(i + 4) * src_stride + j]));
+ xx_storeu_128(A + i * buf_stride + j, a_res);
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
+ const __m128i a_complement =
+ _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res);
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
+ // sum1 might have lanes greater than 2^15, so we can't use madd to do
+ // multiplication involving sum1. However, a_complement and one_over_n
+ // are both less than 256, so we can multiply them first.
+ const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n);
+ const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1);
+ const __m128i b_res =
+ _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS);
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
+ xx_storeu_128(B + i * buf_stride + j, b_res);
}
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 3) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 2) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 2) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 2) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu8_epi32(
- xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 3) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 3) * buf_stride + j], sum_sq);
}
}
-static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
- int height, int buf_stride, int eps,
- int bit_depth) {
- int i, j;
- // Horizontal sum over 7-pixel regions of dst
- int width_extend = (width + 3) & ~3;
- for (i = 0; i < height; ++i) {
- int h = AOMMIN(4, height - i) + AOMMIN(3, i);
-
- __m128i a1 = _mm_loadu_si128((__m128i *)&A[i * buf_stride]);
- __m128i b1 = _mm_loadu_si128((__m128i *)&B[i * buf_stride]);
- __m128i a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + 4]);
- __m128i b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + 4]);
-
- __m128i sum_ = _mm_add_epi32(
- _mm_add_epi32(
- _mm_add_epi32(_mm_slli_si128(b1, 12), _mm_slli_si128(b1, 8)),
- _mm_add_epi32(_mm_slli_si128(b1, 4), b1)),
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(b2, b1, 4),
- _mm_alignr_epi8(b2, b1, 8)),
- _mm_alignr_epi8(b2, b1, 12)));
- __m128i sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(
- _mm_add_epi32(_mm_slli_si128(a1, 12), _mm_slli_si128(a1, 8)),
- _mm_add_epi32(_mm_slli_si128(a1, 4), a1)),
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(a2, a1, 4),
- _mm_alignr_epi8(a2, a1, 8)),
- _mm_alignr_epi8(a2, a1, 12)));
-
- __m128i n = _mm_set_epi32(7 * h, 6 * h, 5 * h, 4 * h);
- __m128i one_over_n =
- _mm_set_epi32(one_by_x[7 * h - 1], one_by_x[6 * h - 1],
- one_by_x[5 * h - 1], one_by_x[4 * h - 1]);
- __m128i s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][7 * h - 1], sgrproj_mtable[eps - 1][6 * h - 1],
- sgrproj_mtable[eps - 1][5 * h - 1], sgrproj_mtable[eps - 1][4 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride, A,
- B);
-
- // Re-align a1 and b1 so that they start at index i * buf_stride + 1
- a2 = _mm_alignr_epi8(a2, a1, 4);
- b2 = _mm_alignr_epi8(b2, b1, 4);
-
- n = _mm_set1_epi32(7 * h);
- one_over_n = _mm_set1_epi32(one_by_x[7 * h - 1]);
- s = _mm_set1_epi32(sgrproj_mtable[eps - 1][7 * h - 1]);
-
- for (j = 4; j < width_extend - 4; j += 4) {
- a1 = a2;
- a2 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 1]);
- b1 = b2;
- b2 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 1]);
- __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 5]);
- __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 5]);
- /* Loop invariant: At this point,
- a1 = original A[i * buf_stride + j - 3 : i * buf_stride + j + 1]
- a2 = original A[i * buf_stride + j + 1 : i * buf_stride + j + 5]
- a3 = original A[i * buf_stride + j + 5 : i * buf_stride + j + 9]
- and similar for b1,b2,b3 and B
- */
- sum_ = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(b1, _mm_alignr_epi8(b2, b1, 4)),
- _mm_add_epi32(_mm_alignr_epi8(b2, b1, 8),
- _mm_alignr_epi8(b2, b1, 12))),
- _mm_add_epi32(_mm_add_epi32(b2, _mm_alignr_epi8(b3, b2, 4)),
- _mm_alignr_epi8(b3, b2, 8)));
- sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(a1, _mm_alignr_epi8(a2, a1, 4)),
- _mm_add_epi32(_mm_alignr_epi8(a2, a1, 8),
- _mm_alignr_epi8(a2, a1, 12))),
- _mm_add_epi32(_mm_add_epi32(a2, _mm_alignr_epi8(a3, a2, 4)),
- _mm_alignr_epi8(a3, a2, 8)));
-
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth,
- i * buf_stride + j, A, B);
- }
- __m128i a3 = _mm_loadu_si128((__m128i *)&A[i * buf_stride + j + 1]);
- __m128i b3 = _mm_loadu_si128((__m128i *)&B[i * buf_stride + j + 1]);
-
- j = width - 4;
- switch (width % 4) {
- case 0:
- a1 = a2;
- b1 = b2;
- a2 = a3;
- b2 = b3;
- break;
- case 1:
- a1 = _mm_alignr_epi8(a2, a1, 4);
- b1 = _mm_alignr_epi8(b2, b1, 4);
- a2 = _mm_alignr_epi8(a3, a2, 4);
- b2 = _mm_alignr_epi8(b3, b2, 4);
- break;
- case 2:
- a1 = _mm_alignr_epi8(a2, a1, 8);
- b1 = _mm_alignr_epi8(b2, b1, 8);
- a2 = _mm_alignr_epi8(a3, a2, 8);
- b2 = _mm_alignr_epi8(b3, b2, 8);
- break;
- case 3:
- a1 = _mm_alignr_epi8(a2, a1, 12);
- b1 = _mm_alignr_epi8(b2, b1, 12);
- a2 = _mm_alignr_epi8(a3, a2, 12);
- b2 = _mm_alignr_epi8(b3, b2, 12);
- break;
- }
-
- // Zero out the data loaded from "off the edge" of the array
- __m128i zero = _mm_setzero_si128();
- a2 = _mm_blend_epi16(a2, zero, 0xc0);
- b2 = _mm_blend_epi16(b2, zero, 0xc0);
-
- sum_ = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(b1, _mm_alignr_epi8(b2, b1, 4)),
- _mm_add_epi32(_mm_alignr_epi8(b2, b1, 8),
- _mm_alignr_epi8(b2, b1, 12))),
- _mm_add_epi32(_mm_add_epi32(b2, _mm_alignr_epi8(zero, b2, 4)),
- _mm_alignr_epi8(zero, b2, 8)));
- sum_sq_ = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(a1, _mm_alignr_epi8(a2, a1, 4)),
- _mm_add_epi32(_mm_alignr_epi8(a2, a1, 8),
- _mm_alignr_epi8(a2, a1, 12))),
- _mm_add_epi32(_mm_add_epi32(a2, _mm_alignr_epi8(zero, a2, 4)),
- _mm_alignr_epi8(zero, a2, 8)));
-
- n = _mm_set_epi32(4 * h, 5 * h, 6 * h, 7 * h);
- one_over_n = _mm_set_epi32(one_by_x[4 * h - 1], one_by_x[5 * h - 1],
- one_by_x[6 * h - 1], one_by_x[7 * h - 1]);
- s = _mm_set_epi32(
- sgrproj_mtable[eps - 1][4 * h - 1], sgrproj_mtable[eps - 1][5 * h - 1],
- sgrproj_mtable[eps - 1][6 * h - 1], sgrproj_mtable[eps - 1][7 * h - 1]);
- calc_block(sum_, sum_sq_, n, &one_over_n, &s, bit_depth, i * buf_stride + j,
- A, B);
- }
+// Calculate 4 values of the "cross sum" starting at buf. This is a 3x3 filter
+// where the outer four corners have weight 3 and all other pixels have weight
+// 4.
+//
+// Pixels are indexed like this:
+// xtl xt xtr
+// xl x xr
+// xbl xb xbr
+//
+// buf points to x
+//
+// fours = xl + xt + xr + xb + x
+// threes = xtl + xtr + xbr + xbl
+// cross_sum = 4 * fours + 3 * threes
+// = 4 * (fours + threes) - threes
+// = (fours + threes) << 2 - threes
+static INLINE __m128i cross_sum(const int32_t *buf, int stride) {
+ const __m128i xtl = xx_loadu_128(buf - 1 - stride);
+ const __m128i xt = xx_loadu_128(buf - stride);
+ const __m128i xtr = xx_loadu_128(buf + 1 - stride);
+ const __m128i xl = xx_loadu_128(buf - 1);
+ const __m128i x = xx_loadu_128(buf);
+ const __m128i xr = xx_loadu_128(buf + 1);
+ const __m128i xbl = xx_loadu_128(buf - 1 + stride);
+ const __m128i xb = xx_loadu_128(buf + stride);
+ const __m128i xbr = xx_loadu_128(buf + 1 + stride);
+
+ const __m128i fours = _mm_add_epi32(
+ xl, _mm_add_epi32(xt, _mm_add_epi32(xr, _mm_add_epi32(xb, x))));
+ const __m128i threes =
+ _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl)));
+
+ return _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(fours, threes), 2), threes);
}
-void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
- int dgd_stride, int32_t *dst,
- int dst_stride, int r, int eps) {
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *A = A_;
- int32_t *B = B_;
- int i, j;
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 16 bytes for efficiency.
- int buf_stride = ((width_ext + 3) & ~3) + 16;
-
- // Don't filter tiles with dimensions < 5 on any axis
- if ((width < 5) || (height < 5)) return;
-
- uint8_t *dgd0 = dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ;
- if (r == 1) {
- selfguided_restoration_1_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
- buf_stride);
- selfguided_restoration_1_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
- } else if (r == 2) {
- selfguided_restoration_2_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
- buf_stride);
- selfguided_restoration_2_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
- } else if (r == 3) {
- selfguided_restoration_3_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
- buf_stride);
- selfguided_restoration_3_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
- } else {
- assert(0);
- }
- A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- {
- i = 0;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] +
- A[k + buf_stride + 1];
- const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] +
- B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
- A[k + buf_stride - 1] + A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
- B[k + buf_stride - 1] + B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] +
- A[k + buf_stride - 1];
- const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] +
- B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
- for (i = 1; i < height - 1; ++i) {
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k + 1] + A[k - buf_stride + 1] +
- A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k + 1] + B[k - buf_stride + 1] +
- B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
-
- // Vectorize the innermost loop
- for (j = 1; j < width - 1; j += 4) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
-
- __m128i tmp0 = _mm_loadu_si128((__m128i *)&A[k - 1 - buf_stride]);
- __m128i tmp1 = _mm_loadu_si128((__m128i *)&A[k + 3 - buf_stride]);
- __m128i tmp2 = _mm_loadu_si128((__m128i *)&A[k - 1]);
- __m128i tmp3 = _mm_loadu_si128((__m128i *)&A[k + 3]);
- __m128i tmp4 = _mm_loadu_si128((__m128i *)&A[k - 1 + buf_stride]);
- __m128i tmp5 = _mm_loadu_si128((__m128i *)&A[k + 3 + buf_stride]);
-
- __m128i a0 = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(tmp3, tmp2, 4), tmp2),
- _mm_add_epi32(_mm_alignr_epi8(tmp3, tmp2, 8),
- _mm_alignr_epi8(tmp5, tmp4, 4))),
- _mm_alignr_epi8(tmp1, tmp0, 4));
- __m128i a1 = _mm_add_epi32(_mm_add_epi32(tmp0, tmp4),
- _mm_add_epi32(_mm_alignr_epi8(tmp1, tmp0, 8),
- _mm_alignr_epi8(tmp5, tmp4, 8)));
- __m128i a = _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(a0, a1), 2), a1);
-
- __m128i tmp6 = _mm_loadu_si128((__m128i *)&B[k - 1 - buf_stride]);
- __m128i tmp7 = _mm_loadu_si128((__m128i *)&B[k + 3 - buf_stride]);
- __m128i tmp8 = _mm_loadu_si128((__m128i *)&B[k - 1]);
- __m128i tmp9 = _mm_loadu_si128((__m128i *)&B[k + 3]);
- __m128i tmp10 = _mm_loadu_si128((__m128i *)&B[k - 1 + buf_stride]);
- __m128i tmp11 = _mm_loadu_si128((__m128i *)&B[k + 3 + buf_stride]);
-
- __m128i b0 = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(tmp9, tmp8, 4), tmp8),
- _mm_add_epi32(_mm_alignr_epi8(tmp9, tmp8, 8),
- _mm_alignr_epi8(tmp11, tmp10, 4))),
- _mm_alignr_epi8(tmp7, tmp6, 4));
- __m128i b1 =
- _mm_add_epi32(_mm_add_epi32(tmp6, tmp10),
- _mm_add_epi32(_mm_alignr_epi8(tmp7, tmp6, 8),
- _mm_alignr_epi8(tmp11, tmp10, 8)));
- __m128i b = _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(b0, b1), 2), b1);
-
- __m128i src = _mm_cvtepu8_epi32(_mm_loadu_si128((__m128i *)&dgd[l]));
-
- __m128i rounding = _mm_set1_epi32(
- (1 << (SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS)) >> 1);
- __m128i v = _mm_add_epi32(_mm_mullo_epi32(a, src), b);
+// The final filter for self-guided restoration. Computes a weighted average
+// across A, B with "cross sums" (see cross_sum implementation above).
+static void final_filter(int32_t *dst, int dst_stride, const int32_t *A,
+ const int32_t *B, int buf_stride, const void *dgd8,
+ int dgd_stride, int width, int height, int highbd) {
+ const int nb = 5;
+ const __m128i rounding =
+ round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+ const uint8_t *dgd_real =
+ highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
+
+ for (int i = 0; i < height; ++i) {
+ for (int j = 0; j < width; j += 4) {
+ const __m128i a = cross_sum(A + i * buf_stride + j, buf_stride);
+ const __m128i b = cross_sum(B + i * buf_stride + j, buf_stride);
+ const __m128i raw =
+ xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m128i src =
+ highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
+
+ __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
__m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding),
SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- _mm_storeu_si128((__m128i *)&dst[m], w);
- }
- // Deal with any extra pixels at the right-hand edge of the frame
- // (typically have 2 such pixels, but may have anywhere between 0 and 3)
- for (; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
- const int32_t a =
- (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
- 4 +
- (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
- A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
- 3;
- const int32_t b =
- (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
- 4 +
- (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
- B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
- 3;
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
-
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k - 1] + A[k - buf_stride - 1] +
- A[k + buf_stride - 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k - 1] + B[k - buf_stride - 1] +
- B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
-
- {
- i = height - 1;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] +
- A[k - buf_stride + 1];
- const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] +
- B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
- A[k - buf_stride - 1] + A[k - buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
- B[k - buf_stride - 1] + B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] +
- A[k - buf_stride - 1];
- const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] +
- B[k - buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
-}
-
-void av1_highpass_filter_sse4_1(uint8_t *dgd, int width, int height, int stride,
- int32_t *dst, int dst_stride, int corner,
- int edge) {
- int i, j;
- const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
-
- {
- i = 0;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
- corner *
- (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k + stride + 1] +
- dgd[k - 1] + dgd[k + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
- corner *
- (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
- }
- }
- {
- i = height - 1;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
- corner *
- (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k - stride - 1] + dgd[k - stride + 1] +
- dgd[k - 1] + dgd[k + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
- corner *
- (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
- }
- }
- __m128i center_ = _mm_set1_epi16(center);
- __m128i edge_ = _mm_set1_epi16(edge);
- __m128i corner_ = _mm_set1_epi16(corner);
- for (i = 1; i < height - 1; ++i) {
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- // Process in units of 8 pixels at a time.
- for (j = 1; j < width - 8; j += 8) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
-
- __m128i a = _mm_loadu_si128((__m128i *)&dgd[k - stride - 1]);
- __m128i b = _mm_loadu_si128((__m128i *)&dgd[k - 1]);
- __m128i c = _mm_loadu_si128((__m128i *)&dgd[k + stride - 1]);
-
- __m128i tl = _mm_cvtepu8_epi16(a);
- __m128i tr = _mm_cvtepu8_epi16(_mm_srli_si128(a, 8));
- __m128i cl = _mm_cvtepu8_epi16(b);
- __m128i cr = _mm_cvtepu8_epi16(_mm_srli_si128(b, 8));
- __m128i bl = _mm_cvtepu8_epi16(c);
- __m128i br = _mm_cvtepu8_epi16(_mm_srli_si128(c, 8));
-
- __m128i x = _mm_alignr_epi8(cr, cl, 2);
- __m128i y = _mm_add_epi16(_mm_add_epi16(_mm_alignr_epi8(tr, tl, 2), cl),
- _mm_add_epi16(_mm_alignr_epi8(br, bl, 2),
- _mm_alignr_epi8(cr, cl, 4)));
- __m128i z = _mm_add_epi16(_mm_add_epi16(tl, bl),
- _mm_add_epi16(_mm_alignr_epi8(tr, tl, 4),
- _mm_alignr_epi8(br, bl, 4)));
-
- __m128i res = _mm_add_epi16(_mm_mullo_epi16(x, center_),
- _mm_add_epi16(_mm_mullo_epi16(y, edge_),
- _mm_mullo_epi16(z, corner_)));
-
- _mm_storeu_si128((__m128i *)&dst[l], _mm_cvtepi16_epi32(res));
- _mm_storeu_si128((__m128i *)&dst[l + 4],
- _mm_cvtepi16_epi32(_mm_srli_si128(res, 8)));
- }
- // If there are enough pixels left in this row, do another batch of 4
- // pixels.
- for (; j < width - 4; j += 4) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
-
- __m128i a = _mm_loadl_epi64((__m128i *)&dgd[k - stride - 1]);
- __m128i b = _mm_loadl_epi64((__m128i *)&dgd[k - 1]);
- __m128i c = _mm_loadl_epi64((__m128i *)&dgd[k + stride - 1]);
-
- __m128i tl = _mm_cvtepu8_epi16(a);
- __m128i cl = _mm_cvtepu8_epi16(b);
- __m128i bl = _mm_cvtepu8_epi16(c);
-
- __m128i x = _mm_srli_si128(cl, 2);
- __m128i y = _mm_add_epi16(
- _mm_add_epi16(_mm_srli_si128(tl, 2), cl),
- _mm_add_epi16(_mm_srli_si128(bl, 2), _mm_srli_si128(cl, 4)));
- __m128i z = _mm_add_epi16(
- _mm_add_epi16(tl, bl),
- _mm_add_epi16(_mm_srli_si128(tl, 4), _mm_srli_si128(bl, 4)));
-
- __m128i res = _mm_add_epi16(_mm_mullo_epi16(x, center_),
- _mm_add_epi16(_mm_mullo_epi16(y, edge_),
- _mm_mullo_epi16(z, corner_)));
-
- _mm_storeu_si128((__m128i *)&dst[l], _mm_cvtepi16_epi32(res));
- }
- // Handle any leftover pixels
- for (; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride + 1] + dgd[k + stride + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride] + dgd[k + stride]);
+ xx_storeu_128(dst + i * dst_stride + j, w);
}
}
}
-void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height,
- int stride, int eps, int *xqd,
- uint8_t *dst, int dst_stride,
- int32_t *tmpbuf) {
- int xq[2];
- int32_t *flt1 = tmpbuf;
- int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
- int i, j;
- assert(width * height <= RESTORATION_TILEPELS_MAX);
-#if USE_HIGHPASS_IN_SGRPROJ
- av1_highpass_filter_sse4_1(dat, width, height, stride, flt1, width,
- sgr_params[eps].corner, sgr_params[eps].edge);
-#else
- av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt1, width,
- sgr_params[eps].r1, sgr_params[eps].e1);
-#endif // USE_HIGHPASS_IN_SGRPROJ
- av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt2, width,
- sgr_params[eps].r2, sgr_params[eps].e2);
- decode_xq(xqd, xq);
-
- __m128i xq0 = _mm_set1_epi32(xq[0]);
- __m128i xq1 = _mm_set1_epi32(xq[1]);
- for (i = 0; i < height; ++i) {
- // Calculate output in batches of 8 pixels
- for (j = 0; j < width; j += 8) {
- const int k = i * width + j;
- const int l = i * stride + j;
- const int m = i * dst_stride + j;
- __m128i src =
- _mm_slli_epi16(_mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&dat[l])),
- SGRPROJ_RST_BITS);
-
- const __m128i u_0 = _mm_cvtepu16_epi32(src);
- const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(src, 8));
-
- const __m128i f1_0 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt1[k]), u_0);
- const __m128i f2_0 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt2[k]), u_0);
- const __m128i f1_1 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt1[k + 4]), u_1);
- const __m128i f2_1 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt2[k + 4]), u_1);
-
- const __m128i v_0 = _mm_add_epi32(
- _mm_add_epi32(_mm_mullo_epi32(xq0, f1_0), _mm_mullo_epi32(xq1, f2_0)),
- _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS));
- const __m128i v_1 = _mm_add_epi32(
- _mm_add_epi32(_mm_mullo_epi32(xq0, f1_1), _mm_mullo_epi32(xq1, f2_1)),
- _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS));
-
- const __m128i rounding =
- _mm_set1_epi32((1 << (SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS)) >> 1);
- const __m128i w_0 = _mm_srai_epi32(_mm_add_epi32(v_0, rounding),
- SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- const __m128i w_1 = _mm_srai_epi32(_mm_add_epi32(v_1, rounding),
- SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
-
- const __m128i tmp = _mm_packs_epi32(w_0, w_1);
- const __m128i res = _mm_packus_epi16(tmp, tmp /* "don't care" value */);
- _mm_storel_epi64((__m128i *)&dst[m], res);
- }
- // Process leftover pixels
- for (; j < width; ++j) {
- const int k = i * width + j;
- const int l = i * stride + j;
- const int m = i * dst_stride + j;
- const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
- const int32_t f1 = (int32_t)flt1[k] - u;
- const int32_t f2 = (int32_t)flt2[k] - u;
- const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
- const int16_t w =
- (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- dst[m] = (uint16_t)clip_pixel(w);
- }
+// Assumes that C, D are integral images for the original buffer which has been
+// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
+// on the sides. A, B, C, D point at logical position (0, 0).
+static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
+ const int32_t *D, int width, int height,
+ int buf_stride, int bit_depth, int sgr_params_idx,
+ int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int n = (2 * r + 1) * (2 * r + 1);
+ const __m128i s = _mm_set1_epi32(params->s[radius_idx]);
+ // one_over_n[n-1] is 2^12/n, so easily fits in an int16
+ const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]);
+
+ const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
+ const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
+
+ // Set up masks
+ const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ __m128i mask[4];
+ for (int idx = 0; idx < 4; idx++) {
+ const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
+ mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
}
-}
-
-#if CONFIG_HIGHBITDEPTH
-// Only the vertical sums need to be adjusted for highbitdepth
-static void highbd_selfguided_restoration_1_v(uint16_t *src, int width,
- int height, int src_stride,
- int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
+ for (int i = -1; i < height + 1; i += 2) {
+ for (int j = -1; j < width + 1; j += 4) {
+ const int32_t *Cij = C + i * buf_stride + j;
+ const int32_t *Dij = D + i * buf_stride + j;
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, x, y, x2, y2;
- __m128i sum, sum_sq, tmp;
+ __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r);
+ __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r);
- a = _mm_loadl_epi64((__m128i *)&src[j]);
- b = _mm_loadl_epi64((__m128i *)&src[src_stride + j]);
+ // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain
+ // some uninitialised data in their upper words. We use a mask to
+ // ensure that these bits are set to 0.
+ int idx = AOMMIN(4, width + 1 - j);
+ assert(idx >= 1);
- sum = _mm_cvtepi16_epi32(_mm_add_epi16(a, b));
- tmp = _mm_unpacklo_epi16(a, b);
- sum_sq = _mm_madd_epi16(tmp, tmp);
+ if (idx < 4) {
+ sum1 = _mm_and_si128(mask[idx], sum1);
+ sum2 = _mm_and_si128(mask[idx], sum2);
+ }
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
+ const __m128i p = compute_p(sum1, sum2, bit_depth, n);
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
+ const __m128i z = _mm_min_epi32(
+ _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z),
+ SGRPROJ_MTABLE_BITS),
+ _mm_set1_epi32(255));
- for (i = 1; i < height - 2; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
+ // 'Gather' type instructions are not available pre-AVX2, so synthesize a
+ // gather using scalar loads.
+ const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)],
+ x_by_xplus1[_mm_extract_epi32(z, 2)],
+ x_by_xplus1[_mm_extract_epi32(z, 1)],
+ x_by_xplus1[_mm_extract_epi32(z, 0)]);
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
- y = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i + 2) * src_stride + j]));
+ xx_storeu_128(A + i * buf_stride + j, a_res);
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
+ const __m128i a_complement =
+ _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res);
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
+ // sum1 might have lanes greater than 2^15, so we can't use madd to do
+ // multiplication involving sum1. However, a_complement and one_over_n
+ // are both less than 256, so we can multiply them first.
+ const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n);
+ const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1);
+ const __m128i b_res =
+ _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS);
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
+ xx_storeu_128(B + i * buf_stride + j, b_res);
}
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
}
}
-static void highbd_selfguided_restoration_2_v(uint16_t *src, int width,
- int height, int src_stride,
- int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
-
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, c, c2, x, y, x2, y2;
- __m128i sum, sum_sq, tmp;
-
- a = _mm_loadl_epi64((__m128i *)&src[j]);
- b = _mm_loadl_epi64((__m128i *)&src[src_stride + j]);
- c = _mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]);
-
- sum = _mm_cvtepi16_epi32(_mm_add_epi16(_mm_add_epi16(a, b), c));
- // Important: We need to widen *before* squaring here, since
- // c^2 may be up to 2^24.
- c = _mm_cvtepu16_epi32(c);
- c2 = _mm_mullo_epi32(c, c);
- tmp = _mm_unpacklo_epi16(a, b);
- sum_sq = _mm_add_epi32(_mm_madd_epi16(tmp, tmp), c2);
-
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[3 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[4 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- for (i = 2; i < height - 3; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 2) * src_stride + j]));
- y = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i + 3) * src_stride + j]));
-
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
-
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
-
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
- }
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 2) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 2) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 2) * buf_stride + j], sum_sq);
- }
+// Calculate 4 values of the "cross sum" starting at buf.
+//
+// Pixels are indexed like this:
+// xtl xt xtr
+// - buf -
+// xbl xb xbr
+//
+// Pixels are weighted like this:
+// 5 6 5
+// 0 0 0
+// 5 6 5
+//
+// fives = xtl + xtr + xbl + xbr
+// sixes = xt + xb
+// cross_sum = 6 * sixes + 5 * fives
+// = 5 * (fives + sixes) - sixes
+// = (fives + sixes) << 2 + (fives + sixes) + sixes
+static INLINE __m128i cross_sum_fast_even_row(const int32_t *buf, int stride) {
+ const __m128i xtl = xx_loadu_128(buf - 1 - stride);
+ const __m128i xt = xx_loadu_128(buf - stride);
+ const __m128i xtr = xx_loadu_128(buf + 1 - stride);
+ const __m128i xbl = xx_loadu_128(buf - 1 + stride);
+ const __m128i xb = xx_loadu_128(buf + stride);
+ const __m128i xbr = xx_loadu_128(buf + 1 + stride);
+
+ const __m128i fives =
+ _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl)));
+ const __m128i sixes = _mm_add_epi32(xt, xb);
+ const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes);
+
+ return _mm_add_epi32(
+ _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes),
+ sixes);
+}
+
+// Calculate 4 values of the "cross sum" starting at buf.
+//
+// Pixels are indexed like this:
+// xl x xr
+//
+// Pixels are weighted like this:
+// 5 6 5
+//
+// buf points to x
+//
+// fives = xl + xr
+// sixes = x
+// cross_sum = 5 * fives + 6 * sixes
+// = 4 * (fives + sixes) + (fives + sixes) + sixes
+// = (fives + sixes) << 2 + (fives + sixes) + sixes
+static INLINE __m128i cross_sum_fast_odd_row(const int32_t *buf) {
+ const __m128i xl = xx_loadu_128(buf - 1);
+ const __m128i x = xx_loadu_128(buf);
+ const __m128i xr = xx_loadu_128(buf + 1);
+
+ const __m128i fives = _mm_add_epi32(xl, xr);
+ const __m128i sixes = x;
+
+ const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes);
+
+ return _mm_add_epi32(
+ _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes),
+ sixes);
}
-static void highbd_selfguided_restoration_3_v(uint16_t *src, int width,
- int height, int src_stride,
- int32_t *A, int32_t *B,
- int buf_stride) {
- int i, j;
-
- int width_extend = (width + 3) & ~3;
- for (j = 0; j < width_extend; j += 4) {
- __m128i a, b, c, d, x, y, x2, y2;
- __m128i sum, sum_sq, tmp, tmp2;
-
- a = _mm_loadl_epi64((__m128i *)&src[j]);
- b = _mm_loadl_epi64((__m128i *)&src[src_stride + j]);
- c = _mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]);
- d = _mm_loadl_epi64((__m128i *)&src[3 * src_stride + j]);
-
- sum = _mm_cvtepi16_epi32(
- _mm_add_epi16(_mm_add_epi16(a, b), _mm_add_epi16(c, d)));
- tmp = _mm_unpacklo_epi16(a, b);
- tmp2 = _mm_unpacklo_epi16(c, d);
- sum_sq =
- _mm_add_epi32(_mm_madd_epi16(tmp, tmp), _mm_madd_epi16(tmp2, tmp2));
-
- _mm_store_si128((__m128i *)&B[j], sum);
- _mm_store_si128((__m128i *)&A[j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[4 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[5 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[2 * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[2 * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[6 * src_stride + j]));
- sum = _mm_add_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_add_epi32(sum_sq, x2);
-
- for (i = 3; i < height - 4; ++i) {
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 3) * src_stride + j]));
- y = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i + 4) * src_stride + j]));
-
- sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
-
- x2 = _mm_mullo_epi32(x, x);
- y2 = _mm_mullo_epi32(y, y);
-
- sum_sq = _mm_add_epi32(sum_sq, _mm_sub_epi32(y2, x2));
+// The final filter for the self-guided restoration. Computes a
+// weighted average across A, B with "cross sums" (see cross_sum_...
+// implementations above).
+static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
+ const int32_t *B, int buf_stride,
+ const void *dgd8, int dgd_stride, int width,
+ int height, int highbd) {
+ const int nb0 = 5;
+ const int nb1 = 4;
+
+ const __m128i rounding0 =
+ round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
+ const __m128i rounding1 =
+ round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
+
+ const uint8_t *dgd_real =
+ highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
+
+ for (int i = 0; i < height; ++i) {
+ if (!(i & 1)) { // even row
+ for (int j = 0; j < width; j += 4) {
+ const __m128i a =
+ cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride);
+ const __m128i b =
+ cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride);
+ const __m128i raw =
+ xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m128i src =
+ highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
+
+ __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
+ __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding0),
+ SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
+
+ xx_storeu_128(dst + i * dst_stride + j, w);
+ }
+ } else { // odd row
+ for (int j = 0; j < width; j += 4) {
+ const __m128i a = cross_sum_fast_odd_row(A + i * buf_stride + j);
+ const __m128i b = cross_sum_fast_odd_row(B + i * buf_stride + j);
+ const __m128i raw =
+ xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
+ const __m128i src =
+ highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
+
+ __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
+ __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding1),
+ SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
+
+ xx_storeu_128(dst + i * dst_stride + j, w);
+ }
}
- _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 3) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 1) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 2) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 2) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 2) * buf_stride + j], sum_sq);
-
- x = _mm_cvtepu16_epi32(
- _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
- sum = _mm_sub_epi32(sum, x);
- x2 = _mm_mullo_epi32(x, x);
- sum_sq = _mm_sub_epi32(sum_sq, x2);
-
- _mm_store_si128((__m128i *)&B[(i + 3) * buf_stride + j], sum);
- _mm_store_si128((__m128i *)&A[(i + 3) * buf_stride + j], sum_sq);
}
}
-void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
- int height, int dgd_stride,
- int32_t *dst, int dst_stride,
- int bit_depth, int r, int eps) {
+void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
+ int height, int dgd_stride,
+ int32_t *flt0, int32_t *flt1,
+ int flt_stride, int sgr_params_idx,
+ int bit_depth, int highbd) {
+ DECLARE_ALIGNED(16, int32_t, buf[4 * RESTORATION_PROC_UNIT_PELS]);
+ memset(buf, 0, sizeof(buf));
+
const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *A = A_;
- int32_t *B = B_;
- int i, j;
+
// Adjusting the stride of A and B here appears to avoid bad cache effects,
// leading to a significant speed improvement.
// We also align the stride to a multiple of 16 bytes for efficiency.
int buf_stride = ((width_ext + 3) & ~3) + 16;
- // Don't filter tiles with dimensions < 5 on any axis
- if ((width < 5) || (height < 5)) return;
-
- uint16_t *dgd0 = dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ;
- if (r == 1) {
- highbd_selfguided_restoration_1_v(dgd0, width_ext, height_ext, dgd_stride,
- A, B, buf_stride);
- selfguided_restoration_1_h(A, B, width_ext, height_ext, buf_stride, eps,
- bit_depth);
- } else if (r == 2) {
- highbd_selfguided_restoration_2_v(dgd0, width_ext, height_ext, dgd_stride,
- A, B, buf_stride);
- selfguided_restoration_2_h(A, B, width_ext, height_ext, buf_stride, eps,
- bit_depth);
- } else if (r == 3) {
- highbd_selfguided_restoration_3_v(dgd0, width_ext, height_ext, dgd_stride,
- A, B, buf_stride);
- selfguided_restoration_3_h(A, B, width_ext, height_ext, buf_stride, eps,
- bit_depth);
- } else {
- assert(0);
- }
- A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- {
- i = 0;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] +
- A[k + buf_stride + 1];
- const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] +
- B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
- A[k + buf_stride - 1] + A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
- B[k + buf_stride - 1] + B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] +
- A[k + buf_stride - 1];
- const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] +
- B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
- for (i = 1; i < height - 1; ++i) {
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k + 1] + A[k - buf_stride + 1] +
- A[k + buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k + 1] + B[k - buf_stride + 1] +
- B[k + buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
-
- // Vectorize the innermost loop
- for (j = 1; j < width - 1; j += 4) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
-
- __m128i tmp0 = _mm_loadu_si128((__m128i *)&A[k - 1 - buf_stride]);
- __m128i tmp1 = _mm_loadu_si128((__m128i *)&A[k + 3 - buf_stride]);
- __m128i tmp2 = _mm_loadu_si128((__m128i *)&A[k - 1]);
- __m128i tmp3 = _mm_loadu_si128((__m128i *)&A[k + 3]);
- __m128i tmp4 = _mm_loadu_si128((__m128i *)&A[k - 1 + buf_stride]);
- __m128i tmp5 = _mm_loadu_si128((__m128i *)&A[k + 3 + buf_stride]);
-
- __m128i a0 = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(tmp3, tmp2, 4), tmp2),
- _mm_add_epi32(_mm_alignr_epi8(tmp3, tmp2, 8),
- _mm_alignr_epi8(tmp5, tmp4, 4))),
- _mm_alignr_epi8(tmp1, tmp0, 4));
- __m128i a1 = _mm_add_epi32(_mm_add_epi32(tmp0, tmp4),
- _mm_add_epi32(_mm_alignr_epi8(tmp1, tmp0, 8),
- _mm_alignr_epi8(tmp5, tmp4, 8)));
- __m128i a = _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(a0, a1), 2), a1);
-
- __m128i tmp6 = _mm_loadu_si128((__m128i *)&B[k - 1 - buf_stride]);
- __m128i tmp7 = _mm_loadu_si128((__m128i *)&B[k + 3 - buf_stride]);
- __m128i tmp8 = _mm_loadu_si128((__m128i *)&B[k - 1]);
- __m128i tmp9 = _mm_loadu_si128((__m128i *)&B[k + 3]);
- __m128i tmp10 = _mm_loadu_si128((__m128i *)&B[k - 1 + buf_stride]);
- __m128i tmp11 = _mm_loadu_si128((__m128i *)&B[k + 3 + buf_stride]);
-
- __m128i b0 = _mm_add_epi32(
- _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(tmp9, tmp8, 4), tmp8),
- _mm_add_epi32(_mm_alignr_epi8(tmp9, tmp8, 8),
- _mm_alignr_epi8(tmp11, tmp10, 4))),
- _mm_alignr_epi8(tmp7, tmp6, 4));
- __m128i b1 =
- _mm_add_epi32(_mm_add_epi32(tmp6, tmp10),
- _mm_add_epi32(_mm_alignr_epi8(tmp7, tmp6, 8),
- _mm_alignr_epi8(tmp11, tmp10, 8)));
- __m128i b = _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(b0, b1), 2), b1);
-
- __m128i src = _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i *)&dgd[l]));
-
- __m128i rounding = _mm_set1_epi32(
- (1 << (SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS)) >> 1);
- __m128i v = _mm_add_epi32(_mm_mullo_epi32(a, src), b);
- __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding),
- SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- _mm_storeu_si128((__m128i *)&dst[m], w);
- }
-
- // Deal with any extra pixels at the right-hand edge of the frame
- // (typically have 2 such pixels, but may have anywhere between 0 and 3)
- for (; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
- const int32_t a =
- (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
- 4 +
- (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
- A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
- 3;
- const int32_t b =
- (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
- 4 +
- (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
- B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
- 3;
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
-
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
- A[k - 1] + A[k - buf_stride - 1] +
- A[k + buf_stride - 1];
- const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
- B[k - 1] + B[k - buf_stride - 1] +
- B[k + buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
+ // The "tl" pointers point at the top-left of the initialised data for the
+ // array. Adding 3 here ensures that column 1 is 16-byte aligned.
+ int32_t *Atl = buf + 0 * RESTORATION_PROC_UNIT_PELS + 3;
+ int32_t *Btl = buf + 1 * RESTORATION_PROC_UNIT_PELS + 3;
+ int32_t *Ctl = buf + 2 * RESTORATION_PROC_UNIT_PELS + 3;
+ int32_t *Dtl = buf + 3 * RESTORATION_PROC_UNIT_PELS + 3;
+
+ // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note
+ // there's a zero row and column in A, B (integral images), so we move down
+ // and right one for them.
+ const int buf_diag_border =
+ SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT;
+
+ int32_t *A0 = Atl + 1 + buf_stride;
+ int32_t *B0 = Btl + 1 + buf_stride;
+ int32_t *C0 = Ctl + 1 + buf_stride;
+ int32_t *D0 = Dtl + 1 + buf_stride;
+
+ // Finally, A, B, C, D point at position (0, 0).
+ int32_t *A = A0 + buf_diag_border;
+ int32_t *B = B0 + buf_diag_border;
+ int32_t *C = C0 + buf_diag_border;
+ int32_t *D = D0 + buf_diag_border;
+
+ const int dgd_diag_border =
+ SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT;
+ const uint8_t *dgd0 = dgd8 - dgd_diag_border;
+
+ // Generate integral images from the input. C will contain sums of squares; D
+ // will contain just sums
+ if (highbd)
+ integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext,
+ height_ext, Ctl, Dtl, buf_stride);
+ else
+ integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
+ buf_stride);
+
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ // Write to flt0 and flt1
+ // If params->r == 0 we skip the corresponding filter. We only allow one of
+ // the radii to be 0, as having both equal to 0 would be equivalent to
+ // skipping SGR entirely.
+ assert(!(params->r[0] == 0 && params->r[1] == 0));
+ assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+ assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+
+ if (params->r[0] > 0) {
+ calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth,
+ sgr_params_idx, 0);
+ final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
+ width, height, highbd);
}
- {
- i = height - 1;
- j = 0;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] +
- A[k - buf_stride + 1];
- const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] +
- B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
- A[k - buf_stride - 1] + A[k - buf_stride + 1];
- const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
- B[k - buf_stride - 1] + B[k - buf_stride + 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- j = width - 1;
- {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 3;
- const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] +
- A[k - buf_stride - 1];
- const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] +
- B[k - buf_stride - 1];
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
+ if (params->r[1] > 0) {
+ calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx,
+ 1);
+ final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+ height, highbd);
}
}
-void av1_highpass_filter_highbd_sse4_1(uint16_t *dgd, int width, int height,
- int stride, int32_t *dst, int dst_stride,
- int corner, int edge) {
- int i, j;
- const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
-
- {
- i = 0;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
- corner *
- (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k + stride + 1] +
- dgd[k - 1] + dgd[k + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
- corner *
- (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
- }
- }
- __m128i center_ = _mm_set1_epi32(center);
- __m128i edge_ = _mm_set1_epi32(edge);
- __m128i corner_ = _mm_set1_epi32(corner);
- for (i = 1; i < height - 1; ++i) {
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- // Process 4 pixels at a time
- for (j = 1; j < width - 4; j += 4) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
-
- __m128i a = _mm_loadu_si128((__m128i *)&dgd[k - stride - 1]);
- __m128i b = _mm_loadu_si128((__m128i *)&dgd[k - 1]);
- __m128i c = _mm_loadu_si128((__m128i *)&dgd[k + stride - 1]);
-
- __m128i tl = _mm_cvtepu16_epi32(a);
- __m128i tr = _mm_cvtepu16_epi32(_mm_srli_si128(a, 8));
- __m128i cl = _mm_cvtepu16_epi32(b);
- __m128i cr = _mm_cvtepu16_epi32(_mm_srli_si128(b, 8));
- __m128i bl = _mm_cvtepu16_epi32(c);
- __m128i br = _mm_cvtepu16_epi32(_mm_srli_si128(c, 8));
-
- __m128i x = _mm_alignr_epi8(cr, cl, 4);
- __m128i y = _mm_add_epi32(_mm_add_epi32(_mm_alignr_epi8(tr, tl, 4), cl),
- _mm_add_epi32(_mm_alignr_epi8(br, bl, 4),
- _mm_alignr_epi8(cr, cl, 8)));
- __m128i z = _mm_add_epi32(_mm_add_epi32(tl, bl),
- _mm_add_epi32(_mm_alignr_epi8(tr, tl, 8),
- _mm_alignr_epi8(br, bl, 8)));
-
- __m128i res = _mm_add_epi32(_mm_mullo_epi32(x, center_),
- _mm_add_epi32(_mm_mullo_epi32(y, edge_),
- _mm_mullo_epi32(z, corner_)));
-
- _mm_storeu_si128((__m128i *)&dst[l], res);
- }
- // Handle any leftover pixels
- for (; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride + 1] + dgd[k + stride + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] +
- edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
- corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
- dgd[k - stride] + dgd[k + stride]);
- }
- }
- {
- i = height - 1;
- j = 0;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
- corner *
- (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
- }
- for (j = 1; j < width - 1; ++j) {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] = center * dgd[k] +
- edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
- corner * (dgd[k - stride - 1] + dgd[k - stride + 1] +
- dgd[k - 1] + dgd[k + 1]);
- }
- j = width - 1;
- {
- const int k = i * stride + j;
- const int l = i * dst_stride + j;
- dst[l] =
- center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
- corner *
- (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
- }
- }
-}
-
-void apply_selfguided_restoration_highbd_sse4_1(
- uint16_t *dat, int width, int height, int stride, int bit_depth, int eps,
- int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf) {
+void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
+ int height, int stride, int eps,
+ const int *xqd, uint8_t *dst8,
+ int dst_stride, int32_t *tmpbuf,
+ int bit_depth, int highbd) {
+ int32_t *flt0 = tmpbuf;
+ int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
+ assert(width * height <= RESTORATION_UNITPELS_MAX);
+ av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1,
+ width, eps, bit_depth, highbd);
+ const sgr_params_type *const params = &sgr_params[eps];
int xq[2];
- int32_t *flt1 = tmpbuf;
- int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
- int i, j;
- assert(width * height <= RESTORATION_TILEPELS_MAX);
-#if USE_HIGHPASS_IN_SGRPROJ
- av1_highpass_filter_highbd_sse4_1(dat, width, height, stride, flt1, width,
- sgr_params[eps].corner,
- sgr_params[eps].edge);
-#else
- av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt1,
- width, bit_depth, sgr_params[eps].r1,
- sgr_params[eps].e1);
-#endif // USE_HIGHPASS_IN_SGRPROJ
- av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt2,
- width, bit_depth, sgr_params[eps].r2,
- sgr_params[eps].e2);
- decode_xq(xqd, xq);
+ decode_xq(xqd, xq, params);
__m128i xq0 = _mm_set1_epi32(xq[0]);
__m128i xq1 = _mm_set1_epi32(xq[1]);
- for (i = 0; i < height; ++i) {
+
+ for (int i = 0; i < height; ++i) {
// Calculate output in batches of 8 pixels
- for (j = 0; j < width; j += 8) {
+ for (int j = 0; j < width; j += 8) {
const int k = i * width + j;
- const int l = i * stride + j;
const int m = i * dst_stride + j;
- __m128i src =
- _mm_slli_epi16(_mm_load_si128((__m128i *)&dat[l]), SGRPROJ_RST_BITS);
-
- const __m128i u_0 = _mm_cvtepu16_epi32(src);
- const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(src, 8));
-
- const __m128i f1_0 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt1[k]), u_0);
- const __m128i f2_0 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt2[k]), u_0);
- const __m128i f1_1 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt1[k + 4]), u_1);
- const __m128i f2_1 =
- _mm_sub_epi32(_mm_loadu_si128((__m128i *)&flt2[k + 4]), u_1);
-
- const __m128i v_0 = _mm_add_epi32(
- _mm_add_epi32(_mm_mullo_epi32(xq0, f1_0), _mm_mullo_epi32(xq1, f2_0)),
- _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS));
- const __m128i v_1 = _mm_add_epi32(
- _mm_add_epi32(_mm_mullo_epi32(xq0, f1_1), _mm_mullo_epi32(xq1, f2_1)),
- _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS));
+
+ const uint8_t *dat8ij = dat8 + i * stride + j;
+ __m128i src;
+ if (highbd) {
+ src = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij));
+ } else {
+ src = _mm_cvtepu8_epi16(xx_loadl_64(dat8ij));
+ }
+
+ const __m128i u = _mm_slli_epi16(src, SGRPROJ_RST_BITS);
+ const __m128i u_0 = _mm_cvtepu16_epi32(u);
+ const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(u, 8));
+
+ __m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
+ __m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
+
+ if (params->r[0] > 0) {
+ const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
+ v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0));
+
+ const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
+ v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1));
+ }
+
+ if (params->r[1] > 0) {
+ const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
+ v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0));
+
+ const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
+ v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1));
+ }
const __m128i rounding =
- _mm_set1_epi32((1 << (SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS)) >> 1);
+ round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
const __m128i w_0 = _mm_srai_epi32(_mm_add_epi32(v_0, rounding),
SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
const __m128i w_1 = _mm_srai_epi32(_mm_add_epi32(v_1, rounding),
SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- // Pack into 16 bits and clamp to [0, 2^bit_depth)
- const __m128i tmp = _mm_packus_epi32(w_0, w_1);
- const __m128i max = _mm_set1_epi16((1 << bit_depth) - 1);
- const __m128i res = _mm_min_epi16(tmp, max);
-
- _mm_store_si128((__m128i *)&dst[m], res);
- }
- // Process leftover pixels
- for (; j < width; ++j) {
- const int k = i * width + j;
- const int l = i * stride + j;
- const int m = i * dst_stride + j;
- const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
- const int32_t f1 = (int32_t)flt1[k] - u;
- const int32_t f2 = (int32_t)flt2[k] - u;
- const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
- const int16_t w =
- (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
+ if (highbd) {
+ // Pack into 16 bits and clamp to [0, 2^bit_depth)
+ const __m128i tmp = _mm_packus_epi32(w_0, w_1);
+ const __m128i max = _mm_set1_epi16((1 << bit_depth) - 1);
+ const __m128i res = _mm_min_epi16(tmp, max);
+ xx_storeu_128(CONVERT_TO_SHORTPTR(dst8 + m), res);
+ } else {
+ // Pack into 8 bits and clamp to [0, 256)
+ const __m128i tmp = _mm_packs_epi32(w_0, w_1);
+ const __m128i res = _mm_packus_epi16(tmp, tmp /* "don't care" value */);
+ xx_storel_64(dst8 + m, res);
+ }
}
}
}
-
-#endif
diff --git a/third_party/aom/av1/common/x86/warp_plane_sse2.c b/third_party/aom/av1/common/x86/warp_plane_sse2.c
deleted file mode 100644
index d30466ae6..000000000
--- a/third_party/aom/av1/common/x86/warp_plane_sse2.c
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "./av1_rtcd.h"
-#include "av1/common/warped_motion.h"
-
-void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
- int height, int stride, uint8_t *pred, int p_col,
- int p_row, int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- int comp_avg = conv_params->do_average;
- __m128i tmp[15];
- int i, j, k;
- const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
- const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
- const int reduce_bits_horiz =
- use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz =
- use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
- const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
-
- /* Note: For this code to work, the left/right frame borders need to be
- extended by at least 13 pixels each. By the time we get here, other
- code will have set up this border, but we allow an explicit check
- for debugging purposes.
- */
- /*for (i = 0; i < height; ++i) {
- for (j = 0; j < 13; ++j) {
- assert(ref[i * stride - 13 + j] == ref[i * stride]);
- assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
- }
- }*/
-
- for (i = 0; i < p_height; i += 8) {
- for (j = 0; j < p_width; j += 8) {
- const int32_t src_x = (p_col + j + 4) << subsampling_x;
- const int32_t src_y = (p_row + i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- // Add in all the constant terms, including rounding and offset
- sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
- sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- // If the block is aligned such that, after clamping, every sample
- // would be taken from the leftmost/rightmost column, then we can
- // skip the expensive horizontal filter.
- if (ix4 <= -7) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
- }
- } else if (ix4 >= width + 6) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
- }
- } else {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i zero = _mm_setzero_si128();
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6
- const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
- // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6
- const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
- // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6
- const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
- const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i round_const = _mm_set1_epi32(
- (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
-
- // Calculate filtered results
- const __m128i src_0 = _mm_unpacklo_epi8(src, zero);
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
- const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(src, 2), zero);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
- const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(src, 4), zero);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
- const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(src, 6), zero);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- // Filter odd-index pixels
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
- const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
- const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(src, 1), zero);
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
- const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(src, 3), zero);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
- const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(src, 5), zero);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
- const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(src, 7), zero);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- // Combine results into one register.
- // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
- // as this order helps with the vertical filter.
- tmp[k + 7] = _mm_packs_epi32(res_even, res_odd);
- }
- }
-
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- // Load from tmp and rearrange pairs of consecutive rows into the
- // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
- const __m128i *src = tmp + (k + 4);
- const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
- const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
- const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
- const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
- const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
- const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
- const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
- const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
- const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
- const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
- const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-#if CONFIG_CONVOLVE_ROUND
- if (use_conv_params) {
- __m128i *const p =
- (__m128i *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j];
- const __m128i round_const = _mm_set1_epi32(
- -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
- ((1 << (conv_params->round_1)) >> 1));
- res_lo = _mm_add_epi32(res_lo, round_const);
- res_lo =
- _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
- _mm_storeu_si128(p, res_lo);
- if (p_width > 4) {
- res_hi = _mm_add_epi32(res_hi, round_const);
- res_hi =
- _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg)
- res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
- _mm_storeu_si128(p + 1, res_hi);
- }
- } else {
-#else
- {
-#endif
- // Round and pack into 8 bits
- const __m128i round_const =
- _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
- ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
-
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
-
- const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
- // Store, blending with 'pred' if needed
- __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
- // Note: If we're outputting a 4x4 block, we need to be very careful
- // to only output 4 pixels at this point, to avoid encode/decode
- // mismatches when encoding with multiple threads.
- if (p_width == 4) {
- if (comp_avg) {
- const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
- res_8bit = _mm_avg_epu8(res_8bit, orig);
- }
- *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
- } else {
- if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
- _mm_storel_epi64(p, res_8bit);
- }
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/warp_plane_ssse3.c b/third_party/aom/av1/common/x86/warp_plane_sse4.c
index 3986ad389..efc542cbf 100644
--- a/third_party/aom/av1/common/x86/warp_plane_ssse3.c
+++ b/third_party/aom/av1/common/x86/warp_plane_sse4.c
@@ -9,9 +9,11 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include <tmmintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "config/av1_rtcd.h"
-#include "./av1_rtcd.h"
#include "av1/common/warped_motion.h"
/* This is a modified version of 'warped_filter' from warped_motion.c:
@@ -201,41 +203,142 @@ static const uint8_t even_mask[16] = { 0, 2, 2, 4, 4, 6, 6, 8,
static const uint8_t odd_mask[16] = { 1, 3, 3, 5, 5, 7, 7, 9,
9, 11, 11, 13, 13, 15, 15, 0 };
-void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
- int height, int stride, uint8_t *pred, int p_col,
- int p_row, int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- int comp_avg = conv_params->do_average;
+static INLINE void horizontal_filter(__m128i src, __m128i *tmp, int sx,
+ int alpha, int k,
+ const int offset_bits_horiz,
+ const int reduce_bits_horiz) {
+ const __m128i src_even =
+ _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)even_mask));
+ const __m128i src_odd =
+ _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)odd_mask));
+
+ // Filter even-index pixels
+ const __m128i tmp_0 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_1 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_2 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_3 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_4 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_5 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_6 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]);
+ const __m128i tmp_7 = _mm_loadl_epi64(
+ (__m128i *)&filter_8bit[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]);
+
+ // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 0 2
+ const __m128i tmp_8 = _mm_unpacklo_epi16(tmp_0, tmp_2);
+ // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 1 3
+ const __m128i tmp_9 = _mm_unpacklo_epi16(tmp_1, tmp_3);
+ // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 4 6
+ const __m128i tmp_10 = _mm_unpacklo_epi16(tmp_4, tmp_6);
+ // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 5 7
+ const __m128i tmp_11 = _mm_unpacklo_epi16(tmp_5, tmp_7);
+
+ // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 0 2 4 6
+ const __m128i tmp_12 = _mm_unpacklo_epi32(tmp_8, tmp_10);
+ // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 0 2 4 6
+ const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_8, tmp_10);
+ // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 1 3 5 7
+ const __m128i tmp_14 = _mm_unpacklo_epi32(tmp_9, tmp_11);
+ // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 1 3 5 7
+ const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_9, tmp_11);
+
+ // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7
+ const __m128i coeff_02 = _mm_unpacklo_epi64(tmp_12, tmp_14);
+ // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7
+ const __m128i coeff_46 = _mm_unpackhi_epi64(tmp_12, tmp_14);
+ // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7
+ const __m128i coeff_13 = _mm_unpacklo_epi64(tmp_13, tmp_15);
+ // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7
+ const __m128i coeff_57 = _mm_unpackhi_epi64(tmp_13, tmp_15);
+
+ // The pixel order we need for 'src' is:
+ // 0 2 2 4 4 6 6 8 1 3 3 5 5 7 7 9
+ const __m128i src_02 = _mm_unpacklo_epi64(src_even, src_odd);
+ const __m128i res_02 = _mm_maddubs_epi16(src_02, coeff_02);
+ // 4 6 6 8 8 10 10 12 5 7 7 9 9 11 11 13
+ const __m128i src_46 = _mm_unpacklo_epi64(_mm_srli_si128(src_even, 4),
+ _mm_srli_si128(src_odd, 4));
+ const __m128i res_46 = _mm_maddubs_epi16(src_46, coeff_46);
+ // 1 3 3 5 5 7 7 9 2 4 4 6 6 8 8 10
+ const __m128i src_13 =
+ _mm_unpacklo_epi64(src_odd, _mm_srli_si128(src_even, 2));
+ const __m128i res_13 = _mm_maddubs_epi16(src_13, coeff_13);
+ // 5 7 7 9 9 11 11 13 6 8 8 10 10 12 12 14
+ const __m128i src_57 = _mm_unpacklo_epi64(_mm_srli_si128(src_odd, 4),
+ _mm_srli_si128(src_even, 6));
+ const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff_57);
+
+ const __m128i round_const = _mm_set1_epi16((1 << offset_bits_horiz) +
+ ((1 << reduce_bits_horiz) >> 1));
+
+ // Note: The values res_02 + res_46 and res_13 + res_57 both
+ // fit into int16s at this point, but their sum may be too wide to fit
+ // into an int16. However, once we also add round_const, the sum of
+ // all of these fits into a uint16.
+ //
+ // The wrapping behaviour of _mm_add_* is used here to make sure we
+ // get the correct result despite converting between different
+ // (implicit) types.
+ const __m128i res_even = _mm_add_epi16(res_02, res_46);
+ const __m128i res_odd = _mm_add_epi16(res_13, res_57);
+ const __m128i res =
+ _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const);
+ tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz));
+}
+
+void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
+ int height, int stride, uint8_t *pred, int p_col,
+ int p_row, int p_width, int p_height, int p_stride,
+ int subsampling_x, int subsampling_y,
+ ConvolveParams *conv_params, int16_t alpha,
+ int16_t beta, int16_t gamma, int16_t delta) {
__m128i tmp[15];
int i, j, k;
const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
- const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
- const int reduce_bits_horiz =
- use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz =
- use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
- if (use_conv_params) {
- conv_params->do_post_rounding = 1;
- }
- assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
- const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
- const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
+ const int reduce_bits_horiz = conv_params->round_0;
+ const int reduce_bits_vert = conv_params->is_compound
+ ? conv_params->round_1
+ : 2 * FILTER_BITS - reduce_bits_horiz;
+ const int offset_bits_horiz = bd + FILTER_BITS - 1;
+ assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+
+ const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
+ const __m128i reduce_bits_vert_shift = _mm_cvtsi32_si128(reduce_bits_vert);
+ const __m128i reduce_bits_vert_const =
+ _mm_set1_epi32(((1 << reduce_bits_vert) >> 1));
+ const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert);
+ const int round_bits =
+ 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+ const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+ const __m128i res_sub_const =
+ _mm_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
+ (1 << (offset_bits - conv_params->round_1 - 1)));
+ __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits);
+ __m128i round_bits_const = _mm_set1_epi16(((1 << round_bits) >> 1));
+
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+ assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
/* Note: For this code to work, the left/right frame borders need to be
- extended by at least 13 pixels each. By the time we get here, other
- code will have set up this border, but we allow an explicit check
- for debugging purposes.
+ extended by at least 13 pixels each. By the time we get here, other
+ code will have set up this border, but we allow an explicit check
+ for debugging purposes.
*/
/*for (i = 0; i < height; ++i) {
- for (j = 0; j < 13; ++j) {
- assert(ref[i * stride - 13 + j] == ref[i * stride]);
- assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
- }
+ for (j = 0; j < 13; ++j) {
+ assert(ref[i * stride - 13 + j] == ref[i * stride]);
+ assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
+ }
}*/
for (i = 0; i < p_height; i += 8) {
@@ -273,10 +376,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
+ (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+ ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
@@ -285,11 +386,37 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS -
- 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)));
+ tmp[k + 7] =
+ _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+ ref[iy * stride + (width - 1)] *
+ (1 << (FILTER_BITS - reduce_bits_horiz)));
+ }
+ } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
+ const int out_of_boundary_left = -(ix4 - 6);
+ const int out_of_boundary_right = (ix4 + 8) - width;
+ for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+ int iy = iy4 + k;
+ if (iy < 0)
+ iy = 0;
+ else if (iy > height - 1)
+ iy = height - 1;
+ int sx = sx4 + beta * (k + 4);
+
+ // Load source pixels
+ __m128i src =
+ _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
+ if (out_of_boundary_left >= 0) {
+ const __m128i shuffle_reg_left =
+ _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]);
+ src = _mm_shuffle_epi8(src, shuffle_reg_left);
+ }
+ if (out_of_boundary_right >= 0) {
+ const __m128i shuffle_reg_right = _mm_loadu_si128(
+ (__m128i *)warp_pad_right[out_of_boundary_right]);
+ src = _mm_shuffle_epi8(src, shuffle_reg_right);
+ }
+ horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz,
+ reduce_bits_horiz);
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
@@ -303,89 +430,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
// Load source pixels
const __m128i src =
_mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src_even =
- _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)even_mask));
- const __m128i src_odd =
- _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)odd_mask));
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_1 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_2 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_3 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_4 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_5 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_6 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_7 = _mm_loadl_epi64((
- __m128i *)&filter_8bit[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]);
-
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 0 2
- const __m128i tmp_8 = _mm_unpacklo_epi16(tmp_0, tmp_2);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 1 3
- const __m128i tmp_9 = _mm_unpacklo_epi16(tmp_1, tmp_3);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 4 6
- const __m128i tmp_10 = _mm_unpacklo_epi16(tmp_4, tmp_6);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 5 7
- const __m128i tmp_11 = _mm_unpacklo_epi16(tmp_5, tmp_7);
-
- // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 0 2 4 6
- const __m128i tmp_12 = _mm_unpacklo_epi32(tmp_8, tmp_10);
- // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 0 2 4 6
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_8, tmp_10);
- // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 1 3 5 7
- const __m128i tmp_14 = _mm_unpacklo_epi32(tmp_9, tmp_11);
- // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 1 3 5 7
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_9, tmp_11);
-
- // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7
- const __m128i coeff_02 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7
- const __m128i coeff_46 = _mm_unpackhi_epi64(tmp_12, tmp_14);
- // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7
- const __m128i coeff_13 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7
- const __m128i coeff_57 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- // The pixel order we need for 'src' is:
- // 0 2 2 4 4 6 6 8 1 3 3 5 5 7 7 9
- const __m128i src_02 = _mm_unpacklo_epi64(src_even, src_odd);
- const __m128i res_02 = _mm_maddubs_epi16(src_02, coeff_02);
- // 4 6 6 8 8 10 10 12 5 7 7 9 9 11 11 13
- const __m128i src_46 = _mm_unpacklo_epi64(_mm_srli_si128(src_even, 4),
- _mm_srli_si128(src_odd, 4));
- const __m128i res_46 = _mm_maddubs_epi16(src_46, coeff_46);
- // 1 3 3 5 5 7 7 9 2 4 4 6 6 8 8 10
- const __m128i src_13 =
- _mm_unpacklo_epi64(src_odd, _mm_srli_si128(src_even, 2));
- const __m128i res_13 = _mm_maddubs_epi16(src_13, coeff_13);
- // 5 7 7 9 9 11 11 13 6 8 8 10 10 12 12 14
- const __m128i src_57 = _mm_unpacklo_epi64(
- _mm_srli_si128(src_odd, 4), _mm_srli_si128(src_even, 6));
- const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff_57);
-
- const __m128i round_const = _mm_set1_epi16(
- (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
-
- // Note: The values res_02 + res_46 and res_13 + res_57 both
- // fit into int16s at this point, but their sum may be too wide to fit
- // into an int16. However, once we also add round_const, the sum of
- // all of these fits into a uint16.
- //
- // The wrapping behaviour of _mm_add_* is used here to make sure we
- // get the correct result despite converting between different
- // (implicit) types.
- const __m128i res_even = _mm_add_epi16(res_02, res_46);
- const __m128i res_odd = _mm_add_epi16(res_13, res_57);
- const __m128i res =
- _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const);
- tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz));
+ horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz,
+ reduce_bits_horiz);
}
}
@@ -474,40 +520,85 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
__m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
__m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-#if CONFIG_CONVOLVE_ROUND
- if (use_conv_params) {
+ if (conv_params->is_compound) {
__m128i *const p =
(__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j];
- const __m128i round_const = _mm_set1_epi32(
- -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
- ((1 << (conv_params->round_1)) >> 1));
- res_lo = _mm_add_epi32(res_lo, round_const);
- res_lo =
- _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
- _mm_storeu_si128(p, res_lo);
+ res_lo = _mm_add_epi32(res_lo, res_add_const);
+ res_lo = _mm_sra_epi32(_mm_add_epi32(res_lo, reduce_bits_vert_const),
+ reduce_bits_vert_shift);
+ const __m128i temp_lo_16 = _mm_packus_epi32(res_lo, res_lo);
+ __m128i res_lo_16;
+ if (conv_params->do_average) {
+ __m128i *const dst8 = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+ const __m128i p_16 = _mm_loadl_epi64(p);
+
+ if (conv_params->use_jnt_comp_avg) {
+ const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, temp_lo_16);
+ const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, wt);
+ const __m128i shifted_32 =
+ _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
+ res_lo_16 = _mm_packus_epi32(shifted_32, shifted_32);
+ } else {
+ res_lo_16 = _mm_srai_epi16(_mm_add_epi16(p_16, temp_lo_16), 1);
+ }
+
+ res_lo_16 = _mm_add_epi16(res_lo_16, res_sub_const);
+
+ res_lo_16 = _mm_sra_epi16(
+ _mm_add_epi16(res_lo_16, round_bits_const), round_bits_shift);
+ __m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16);
+ *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo);
+ } else {
+ _mm_storel_epi64(p, temp_lo_16);
+ }
if (p_width > 4) {
- res_hi = _mm_add_epi32(res_hi, round_const);
+ __m128i *const p4 =
+ (__m128i *)&conv_params
+ ->dst[(i + k + 4) * conv_params->dst_stride + j + 4];
+
+ res_hi = _mm_add_epi32(res_hi, res_add_const);
res_hi =
- _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
- if (comp_avg)
- res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
- _mm_storeu_si128(p + 1, res_hi);
+ _mm_sra_epi32(_mm_add_epi32(res_hi, reduce_bits_vert_const),
+ reduce_bits_vert_shift);
+ const __m128i temp_hi_16 = _mm_packus_epi32(res_hi, res_hi);
+ __m128i res_hi_16;
+
+ if (conv_params->do_average) {
+ __m128i *const dst8_4 =
+ (__m128i *)&pred[(i + k + 4) * p_stride + j + 4];
+ const __m128i p4_16 = _mm_loadl_epi64(p4);
+
+ if (conv_params->use_jnt_comp_avg) {
+ const __m128i p_16_hi = _mm_unpacklo_epi16(p4_16, temp_hi_16);
+ const __m128i wt_res_hi = _mm_madd_epi16(p_16_hi, wt);
+ const __m128i shifted_32 =
+ _mm_srai_epi32(wt_res_hi, DIST_PRECISION_BITS);
+ res_hi_16 = _mm_packus_epi32(shifted_32, shifted_32);
+ } else {
+ res_hi_16 = _mm_srai_epi16(_mm_add_epi16(p4_16, temp_hi_16), 1);
+ }
+ res_hi_16 = _mm_add_epi16(res_hi_16, res_sub_const);
+
+ res_hi_16 = _mm_sra_epi16(
+ _mm_add_epi16(res_hi_16, round_bits_const), round_bits_shift);
+ __m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16);
+ *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
+
+ } else {
+ _mm_storel_epi64(p4, temp_hi_16);
+ }
}
} else {
-#else
- {
-#endif
// Round and pack into 8 bits
const __m128i round_const =
- _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
- ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
+ _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
+ ((1 << reduce_bits_vert) >> 1));
const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
+ _mm_add_epi32(res_lo, round_const), reduce_bits_vert);
const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
+ _mm_add_epi32(res_hi, round_const), reduce_bits_vert);
const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
__m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
@@ -519,13 +610,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
// to only output 4 pixels at this point, to avoid encode/decode
// mismatches when encoding with multiple threads.
if (p_width == 4) {
- if (comp_avg) {
- const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
- res_8bit = _mm_avg_epu8(res_8bit, orig);
- }
*(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
} else {
- if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
_mm_storel_epi64(p, res_8bit);
}
}
diff --git a/third_party/aom/av1/common/x86/wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/wiener_convolve_avx2.c
new file mode 100644
index 000000000..e1449fd21
--- /dev/null
+++ b/third_party/aom/av1/common/x86/wiener_convolve_avx2.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+#include <assert.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/synonyms.h"
+#include "aom_dsp/x86/synonyms_avx2.h"
+
+// 128-bit xmmwords are written as [ ... ] with the MSB on the left.
+// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB
+// on the left.
+// A row of, say, 8-bit pixels with values p0, p1, p2, ..., p30, p31 will be
+// loaded and stored as [ p31 ... p17 p16 ][ p15 ... p1 p0 ].
+void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h,
+ const ConvolveParams *conv_params) {
+ const int bd = 8;
+ assert(x_step_q4 == 16 && y_step_q4 == 16);
+ assert(!(w & 7));
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ DECLARE_ALIGNED(32, uint16_t,
+ temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
+ int intermediate_height = h + SUBPEL_TAPS - 1;
+ const int center_tap = ((SUBPEL_TAPS - 1) / 2);
+ const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap;
+
+ const __m128i zero_128 = _mm_setzero_si128();
+ const __m256i zero_256 = _mm256_setzero_si256();
+
+ // Add an offset to account for the "add_src" part of the convolve function.
+ const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3);
+
+ const __m256i clamp_low = zero_256;
+ const __m256i clamp_high =
+ _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1);
+
+ /* Horizontal filter */
+ {
+ // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
+ const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset);
+
+ // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
+ const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
+ const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
+
+ // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
+ const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
+ // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
+ const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
+ // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
+ const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
+ // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
+ const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
+
+ const __m256i round_const = _mm256_set1_epi32(
+ (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
+
+ for (int i = 0; i < intermediate_height; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ const uint8_t *data_ij = src_ptr + i * src_stride + j;
+
+ // Load 8-bit src data
+ const __m128i data_0 = xx_loadu_128(data_ij + 0);
+ const __m128i data_1 = xx_loadu_128(data_ij + 1);
+ const __m128i data_2 = xx_loadu_128(data_ij + 2);
+ const __m128i data_3 = xx_loadu_128(data_ij + 3);
+ const __m128i data_4 = xx_loadu_128(data_ij + 4);
+ const __m128i data_5 = xx_loadu_128(data_ij + 5);
+ const __m128i data_6 = xx_loadu_128(data_ij + 6);
+ const __m128i data_7 = xx_loadu_128(data_ij + 7);
+
+ // (Zero-)Extend 8-bit data to 16-bit data
+ const __m256i src_0 = _mm256_cvtepu8_epi16(data_0);
+ const __m256i src_1 = _mm256_cvtepu8_epi16(data_1);
+ const __m256i src_2 = _mm256_cvtepu8_epi16(data_2);
+ const __m256i src_3 = _mm256_cvtepu8_epi16(data_3);
+ const __m256i src_4 = _mm256_cvtepu8_epi16(data_4);
+ const __m256i src_5 = _mm256_cvtepu8_epi16(data_5);
+ const __m256i src_6 = _mm256_cvtepu8_epi16(data_6);
+ const __m256i src_7 = _mm256_cvtepu8_epi16(data_7);
+
+ // Multiply src data by filter coeffs and sum pairs
+ const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
+ const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
+ const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
+ const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
+ const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
+ const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
+ const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
+ const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
+
+ // Calculate scalar product for even- and odd-indices separately,
+ // increasing to 32-bit precision
+ const __m256i res_even_sum = _mm256_add_epi32(
+ _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6));
+ const __m256i res_odd_sum = _mm256_add_epi32(
+ _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7));
+
+ const __m256i res_even = _mm256_srai_epi32(
+ _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0);
+ const __m256i res_odd = _mm256_srai_epi32(
+ _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0);
+
+ // Reduce to 16-bit precision and pack even- and odd-index results
+ // back into one register. The _mm256_packs_epi32 intrinsic returns
+ // a register with the pixels ordered as follows:
+ // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
+ const __m256i res = _mm256_packs_epi32(res_even, res_odd);
+ const __m256i res_clamped =
+ _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high);
+
+ // Store in a temporary array
+ yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ // coeffs [ g7 g6 g5 g4 g3 g2 g1 g0 ]
+ const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset);
+
+ // coeffs [ g3 g2 g3 g2 g1 g0 g1 g0 ]
+ const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs [ g7 g6 g7 g6 g5 g4 g5 g4 ]
+ const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ]
+ const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ]
+ const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
+ // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ]
+ const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
+ // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ]
+ const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
+
+ // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ][ g1 g0 g1 g0 g1 g0 g1 g0 ]
+ const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
+ // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ][ g3 g2 g3 g2 g3 g2 g3 g2 ]
+ const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
+ // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ][ g5 g4 g5 g4 g5 g4 g5 g4 ]
+ const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
+ // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ][ g7 g6 g7 g6 g7 g6 g7 g6 ]
+ const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
+
+ const __m256i round_const =
+ _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) -
+ (1 << (bd + conv_params->round_1 - 1)));
+
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; j += 16) {
+ const uint16_t *data_ij = temp + i * MAX_SB_SIZE + j;
+
+ // Load 16-bit data from the output of the horizontal filter in
+ // which the pixels are ordered as follows:
+ // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
+ const __m256i data_0 = yy_loadu_256(data_ij + 0 * MAX_SB_SIZE);
+ const __m256i data_1 = yy_loadu_256(data_ij + 1 * MAX_SB_SIZE);
+ const __m256i data_2 = yy_loadu_256(data_ij + 2 * MAX_SB_SIZE);
+ const __m256i data_3 = yy_loadu_256(data_ij + 3 * MAX_SB_SIZE);
+ const __m256i data_4 = yy_loadu_256(data_ij + 4 * MAX_SB_SIZE);
+ const __m256i data_5 = yy_loadu_256(data_ij + 5 * MAX_SB_SIZE);
+ const __m256i data_6 = yy_loadu_256(data_ij + 6 * MAX_SB_SIZE);
+ const __m256i data_7 = yy_loadu_256(data_ij + 7 * MAX_SB_SIZE);
+
+ // Filter the even-indices, increasing to 32-bit precision
+ const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1);
+ const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3);
+ const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5);
+ const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7);
+
+ const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
+ const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
+ const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
+ const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
+
+ const __m256i res_even = _mm256_add_epi32(
+ _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
+
+ // Filter the odd-indices, increasing to 32-bit precision
+ const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1);
+ const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3);
+ const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5);
+ const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7);
+
+ const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
+ const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
+ const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
+ const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
+
+ const __m256i res_odd = _mm256_add_epi32(
+ _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
+
+ // Pixels are currently in the following order:
+ // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ]
+ // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ]
+ //
+ // Rearrange the pixels into the following order:
+ // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ]
+ // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ]
+ const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
+ const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
+
+ const __m256i res_lo_round = _mm256_srai_epi32(
+ _mm256_add_epi32(res_lo, round_const), conv_params->round_1);
+ const __m256i res_hi_round = _mm256_srai_epi32(
+ _mm256_add_epi32(res_hi, round_const), conv_params->round_1);
+
+ // Reduce to 16-bit precision and pack into the correct order:
+ // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ]
+ const __m256i res_16bit =
+ _mm256_packs_epi32(res_lo_round, res_hi_round);
+
+ // Reduce to 8-bit precision. This messes up the order:
+ // [ - - - - - - - - 15 14 13 12 11 10 9 8 ]
+ // [ - - - - - - - - 7 6 5 4 3 2 1 0 ]
+ const __m256i res_8bit =
+ _mm256_packus_epi16(res_16bit, zero_256 /* don't care value */);
+
+ // Swap the two central 32-bit values to get the order:
+ // [ - - - - - - - - - - - - - - - - ]
+ // [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
+ const __m256i res_8bit2 = _mm256_permute4x64_epi64(res_8bit, 0xd8);
+
+ // Store the lower 128-bit lane in the dst array
+ xx_storeu_128(dst + i * dst_stride + j,
+ _mm256_castsi256_si128(res_8bit2));
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/x86/wiener_convolve_sse2.c b/third_party/aom/av1/common/x86/wiener_convolve_sse2.c
new file mode 100644
index 000000000..3083d224b
--- /dev/null
+++ b/third_party/aom/av1/common/x86/wiener_convolve_sse2.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+#include <assert.h>
+
+#include "config/av1_rtcd.h"
+
+#include "av1/common/convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+
+void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h,
+ const ConvolveParams *conv_params) {
+ const int bd = 8;
+ assert(x_step_q4 == 16 && y_step_q4 == 16);
+ assert(!(w & 7));
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ DECLARE_ALIGNED(16, uint16_t,
+ temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
+ int intermediate_height = h + SUBPEL_TAPS - 1;
+ int i, j;
+ const int center_tap = ((SUBPEL_TAPS - 1) / 2);
+ const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap;
+
+ const __m128i zero = _mm_setzero_si128();
+ // Add an offset to account for the "add_src" part of the convolve function.
+ const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3);
+
+ /* Horizontal filter */
+ {
+ const __m128i coeffs_x =
+ _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const = _mm_set1_epi32(
+ (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
+
+ for (i = 0; i < intermediate_height; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const __m128i data =
+ _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
+
+ // Filter even-index pixels
+ const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
+ const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
+ _mm_add_epi32(res_2, res_6));
+ res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
+ conv_params->round_0);
+
+ // Filter odd-index pixels
+ const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
+ _mm_add_epi32(res_3, res_7));
+ res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
+ conv_params->round_0);
+
+ // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
+ __m128i res = _mm_packs_epi32(res_even, res_odd);
+ res = _mm_min_epi16(
+ _mm_max_epi16(res, zero),
+ _mm_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1));
+ _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res);
+ }
+ }
+ }
+
+ /* Vertical filter */
+ {
+ const __m128i coeffs_y =
+ _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset);
+
+ // coeffs 0 1 0 1 2 3 2 3
+ const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
+ // coeffs 4 5 4 5 6 7 6 7
+ const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
+
+ // coeffs 0 1 0 1 0 1 0 1
+ const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
+ // coeffs 2 3 2 3 2 3 2 3
+ const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
+ // coeffs 4 5 4 5 4 5 4 5
+ const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
+ // coeffs 6 7 6 7 6 7 6 7
+ const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
+
+ const __m128i round_const =
+ _mm_set1_epi32((1 << (conv_params->round_1 - 1)) -
+ (1 << (bd + conv_params->round_1 - 1)));
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ // Filter even-index pixels
+ const uint16_t *data = &temp[i * MAX_SB_SIZE + j];
+ const __m128i src_0 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
+ *(__m128i *)(data + 1 * MAX_SB_SIZE));
+ const __m128i src_2 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
+ *(__m128i *)(data + 3 * MAX_SB_SIZE));
+ const __m128i src_4 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
+ *(__m128i *)(data + 5 * MAX_SB_SIZE));
+ const __m128i src_6 =
+ _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
+ *(__m128i *)(data + 7 * MAX_SB_SIZE));
+
+ const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
+ const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
+ const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
+ const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
+
+ const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
+ _mm_add_epi32(res_4, res_6));
+
+ // Filter odd-index pixels
+ const __m128i src_1 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
+ *(__m128i *)(data + 1 * MAX_SB_SIZE));
+ const __m128i src_3 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
+ *(__m128i *)(data + 3 * MAX_SB_SIZE));
+ const __m128i src_5 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
+ *(__m128i *)(data + 5 * MAX_SB_SIZE));
+ const __m128i src_7 =
+ _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
+ *(__m128i *)(data + 7 * MAX_SB_SIZE));
+
+ const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
+ const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
+ const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
+ const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
+
+ const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
+ _mm_add_epi32(res_5, res_7));
+
+ // Rearrange pixels back into the order 0 ... 7
+ const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+ const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+ const __m128i res_lo_round = _mm_srai_epi32(
+ _mm_add_epi32(res_lo, round_const), conv_params->round_1);
+ const __m128i res_hi_round = _mm_srai_epi32(
+ _mm_add_epi32(res_hi, round_const), conv_params->round_1);
+
+ const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+ __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
+
+ __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
+ _mm_storel_epi64(p, res_8bit);
+ }
+ }
+ }
+}
diff --git a/third_party/aom/av1/common/zigzag.h b/third_party/aom/av1/common/zigzag.h
deleted file mode 100644
index c58b18b57..000000000
--- a/third_party/aom/av1/common/zigzag.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#if !defined(_zigzag_H)
-# define _zigzag_H (1)
-
-extern const unsigned char OD_ZIGZAG4_DCT_DCT[15][2];
-extern const unsigned char OD_ZIGZAG4_ADST_DCT[15][2];
-extern const unsigned char OD_ZIGZAG4_DCT_ADST[15][2];
-#define OD_ZIGZAG4_ADST_ADST OD_ZIGZAG4_DCT_DCT
-
-extern const unsigned char OD_ZIGZAG8_DCT_DCT[48][2];
-extern const unsigned char OD_ZIGZAG8_ADST_DCT[48][2];
-extern const unsigned char OD_ZIGZAG8_DCT_ADST[48][2];
-#define OD_ZIGZAG8_ADST_ADST OD_ZIGZAG8_DCT_DCT
-
-extern const unsigned char OD_ZIGZAG16_DCT_DCT[192][2];
-extern const unsigned char OD_ZIGZAG16_ADST_DCT[192][2];
-extern const unsigned char OD_ZIGZAG16_DCT_ADST[192][2];
-#define OD_ZIGZAG16_ADST_ADST OD_ZIGZAG16_DCT_DCT
-
-extern const unsigned char OD_ZIGZAG32_DCT_DCT[768][2];
-#endif
diff --git a/third_party/aom/av1/common/zigzag16.c b/third_party/aom/av1/common/zigzag16.c
deleted file mode 100644
index 6df6e3855..000000000
--- a/third_party/aom/av1/common/zigzag16.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/* This file is generated by gen_zigzag16.m */
-
-/* clang-format off */
-
-#include "odintrin.h"
-OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_DCT[192][2] = {
- {8, 0}, {8, 1}, {8, 2}, {9, 0},
- {8, 3}, {9, 1}, {9, 2}, {10, 0},
- {9, 3}, {10, 1}, {10, 2}, {11, 0},
- {10, 3}, {11, 1}, {11, 2}, {11, 3},
- {12, 0}, {12, 1}, {13, 0}, {12, 2},
- {12, 3}, {13, 1}, {13, 2}, {14, 0},
- {13, 3}, {14, 1}, {15, 0}, {14, 2},
- {14, 3}, {15, 1}, {15, 2}, {15, 3},
- {0, 8}, {1, 8}, {0, 9}, {2, 8},
- {1, 9}, {3, 8}, {0, 10}, {2, 9},
- {1, 10}, {3, 9}, {0, 11}, {2, 10},
- {1, 11}, {3, 10}, {0, 12}, {2, 11},
- {1, 12}, {3, 11}, {0, 13}, {2, 12},
- {1, 13}, {0, 14}, {3, 12}, {2, 13},
- {1, 14}, {3, 13}, {0, 15}, {2, 14},
- {1, 15}, {3, 14}, {2, 15}, {3, 15},
- {4, 8}, {5, 8}, {4, 9}, {8, 4},
- {8, 5}, {6, 8}, {5, 9}, {4, 10},
- {9, 4}, {8, 6}, {7, 8}, {9, 5},
- {5, 10}, {8, 7}, {6, 9}, {4, 11},
- {10, 4}, {9, 6}, {7, 9}, {8, 8},
- {10, 5}, {6, 10}, {5, 11}, {9, 7},
- {8, 9}, {10, 6}, {7, 10}, {4, 12},
- {11, 4}, {9, 8}, {6, 11}, {10, 7},
- {11, 5}, {5, 12}, {8, 10}, {7, 11},
- {9, 9}, {4, 13}, {10, 8}, {11, 6},
- {11, 7}, {6, 12}, {8, 11}, {9, 10},
- {12, 4}, {5, 13}, {10, 9}, {12, 5},
- {7, 12}, {11, 8}, {4, 14}, {6, 13},
- {10, 10}, {9, 11}, {12, 6}, {13, 4},
- {11, 9}, {8, 12}, {5, 14}, {12, 7},
- {7, 13}, {4, 15}, {13, 5}, {10, 11},
- {11, 10}, {9, 12}, {13, 6}, {12, 8},
- {6, 14}, {8, 13}, {5, 15}, {13, 7},
- {14, 4}, {12, 9}, {7, 14}, {11, 11},
- {10, 12}, {9, 13}, {14, 5}, {6, 15},
- {13, 8}, {8, 14}, {12, 10}, {14, 6},
- {7, 15}, {13, 9}, {15, 4}, {10, 13},
- {11, 12}, {14, 7}, {9, 14}, {12, 11},
- {8, 15}, {15, 5}, {13, 10}, {14, 8},
- {11, 13}, {15, 6}, {9, 15}, {10, 14},
- {14, 9}, {15, 7}, {13, 11}, {12, 12},
- {10, 15}, {11, 14}, {15, 8}, {14, 10},
- {12, 13}, {13, 12}, {15, 9}, {11, 15},
- {14, 11}, {13, 13}, {15, 10}, {12, 14},
- {13, 14}, {15, 11}, {14, 12}, {12, 15},
- {14, 13}, {13, 15}, {15, 12}, {14, 14},
- {15, 13}, {14, 15}, {15, 14}, {15, 15}
- };
-
-OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_DCT[192][2] = {
- {8, 0}, {9, 0}, {10, 0}, {8, 1},
- {11, 0}, {9, 1}, {8, 2}, {12, 0},
- {10, 1}, {9, 2}, {8, 3}, {13, 0},
- {11, 1}, {10, 2}, {9, 3}, {14, 0},
- {12, 1}, {10, 3}, {15, 0}, {11, 2},
- {13, 1}, {11, 3}, {12, 2}, {14, 1},
- {12, 3}, {13, 2}, {15, 1}, {13, 3},
- {14, 2}, {14, 3}, {15, 2}, {15, 3},
- {0, 8}, {1, 8}, {2, 8}, {0, 9},
- {3, 8}, {1, 9}, {2, 9}, {0, 10},
- {3, 9}, {1, 10}, {2, 10}, {0, 11},
- {3, 10}, {1, 11}, {2, 11}, {0, 12},
- {3, 11}, {1, 12}, {2, 12}, {0, 13},
- {3, 12}, {1, 13}, {0, 14}, {2, 13},
- {0, 15}, {1, 14}, {3, 13}, {2, 14},
- {1, 15}, {3, 14}, {2, 15}, {3, 15},
- {8, 4}, {9, 4}, {8, 5}, {4, 8},
- {10, 4}, {9, 5}, {5, 8}, {8, 6},
- {4, 9}, {10, 5}, {9, 6}, {6, 8},
- {8, 7}, {11, 4}, {7, 8}, {5, 9},
- {9, 7}, {11, 5}, {10, 6}, {4, 10},
- {6, 9}, {8, 8}, {5, 10}, {7, 9},
- {12, 4}, {10, 7}, {9, 8}, {11, 6},
- {8, 9}, {4, 11}, {6, 10}, {7, 10},
- {12, 5}, {5, 11}, {10, 8}, {11, 7},
- {9, 9}, {4, 12}, {13, 4}, {8, 10},
- {6, 11}, {12, 6}, {5, 12}, {10, 9},
- {7, 11}, {9, 10}, {11, 8}, {13, 5},
- {8, 11}, {4, 13}, {6, 12}, {10, 10},
- {12, 7}, {11, 9}, {7, 12}, {14, 4},
- {5, 13}, {9, 11}, {13, 6}, {8, 12},
- {4, 14}, {12, 8}, {6, 13}, {11, 10},
- {10, 11}, {12, 9}, {5, 14}, {13, 7},
- {14, 5}, {9, 12}, {4, 15}, {7, 13},
- {8, 13}, {6, 14}, {13, 8}, {11, 11},
- {10, 12}, {15, 4}, {12, 10}, {14, 6},
- {13, 9}, {5, 15}, {9, 13}, {7, 14},
- {15, 5}, {6, 15}, {8, 14}, {14, 7},
- {11, 12}, {7, 15}, {9, 14}, {13, 10},
- {10, 13}, {14, 8}, {15, 6}, {14, 9},
- {12, 11}, {8, 15}, {15, 7}, {10, 14},
- {11, 13}, {9, 15}, {13, 11}, {12, 12},
- {15, 8}, {14, 10}, {15, 9}, {10, 15},
- {11, 14}, {13, 12}, {12, 13}, {15, 10},
- {14, 11}, {11, 15}, {13, 13}, {15, 11},
- {14, 12}, {12, 14}, {15, 12}, {13, 14},
- {12, 15}, {14, 13}, {13, 15}, {15, 13},
- {14, 14}, {15, 14}, {14, 15}, {15, 15}
- };
-
-OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_ADST[192][2] = {
- {8, 0}, {8, 1}, {8, 2}, {8, 3},
- {9, 0}, {9, 1}, {9, 2}, {9, 3},
- {10, 0}, {10, 1}, {10, 2}, {10, 3},
- {11, 0}, {11, 1}, {11, 2}, {11, 3},
- {12, 0}, {12, 1}, {12, 2}, {12, 3},
- {13, 0}, {13, 1}, {13, 2}, {13, 3},
- {14, 0}, {15, 0}, {14, 1}, {14, 2},
- {14, 3}, {15, 1}, {15, 2}, {15, 3},
- {0, 8}, {0, 9}, {0, 10}, {1, 8},
- {0, 11}, {1, 9}, {2, 8}, {0, 12},
- {1, 10}, {2, 9}, {0, 13}, {1, 11},
- {3, 8}, {2, 10}, {0, 14}, {1, 12},
- {3, 9}, {0, 15}, {2, 11}, {3, 10},
- {1, 13}, {2, 12}, {3, 11}, {1, 14},
- {2, 13}, {1, 15}, {3, 12}, {2, 14},
- {3, 13}, {2, 15}, {3, 14}, {3, 15},
- {4, 8}, {4, 9}, {5, 8}, {4, 10},
- {5, 9}, {4, 11}, {6, 8}, {5, 10},
- {8, 4}, {6, 9}, {4, 12}, {5, 11},
- {8, 5}, {6, 10}, {7, 8}, {8, 6},
- {4, 13}, {7, 9}, {5, 12}, {8, 7},
- {9, 4}, {6, 11}, {8, 8}, {7, 10},
- {5, 13}, {9, 5}, {4, 14}, {9, 6},
- {8, 9}, {6, 12}, {9, 7}, {7, 11},
- {4, 15}, {8, 10}, {9, 8}, {5, 14},
- {10, 4}, {6, 13}, {10, 5}, {9, 9},
- {7, 12}, {8, 11}, {10, 6}, {5, 15},
- {10, 7}, {6, 14}, {9, 10}, {7, 13},
- {8, 12}, {10, 8}, {9, 11}, {6, 15},
- {11, 4}, {11, 5}, {10, 9}, {8, 13},
- {7, 14}, {11, 6}, {9, 12}, {11, 7},
- {10, 10}, {7, 15}, {8, 14}, {12, 4},
- {11, 8}, {12, 5}, {9, 13}, {10, 11},
- {8, 15}, {11, 9}, {12, 6}, {12, 7},
- {10, 12}, {9, 14}, {11, 10}, {13, 4},
- {12, 8}, {9, 15}, {13, 5}, {11, 11},
- {12, 9}, {10, 13}, {13, 6}, {13, 7},
- {12, 10}, {14, 4}, {11, 12}, {13, 8},
- {10, 14}, {14, 5}, {12, 11}, {13, 9},
- {14, 6}, {10, 15}, {11, 13}, {15, 4},
- {14, 7}, {12, 12}, {13, 10}, {14, 8},
- {15, 5}, {13, 11}, {15, 6}, {11, 14},
- {14, 9}, {12, 13}, {11, 15}, {15, 7},
- {14, 10}, {15, 8}, {13, 12}, {12, 14},
- {15, 9}, {14, 11}, {13, 13}, {12, 15},
- {15, 10}, {14, 12}, {13, 14}, {15, 11},
- {13, 15}, {14, 13}, {14, 14}, {15, 12},
- {14, 15}, {15, 13}, {15, 14}, {15, 15}
- };
diff --git a/third_party/aom/av1/common/zigzag32.c b/third_party/aom/av1/common/zigzag32.c
deleted file mode 100644
index cb3b9bc63..000000000
--- a/third_party/aom/av1/common/zigzag32.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/* This file is generated by gen_zigzag32.m */
-
-/* clang-format off */
-
-#include "odintrin.h"
-OD_EXTERN const unsigned char OD_ZIGZAG32_DCT_DCT[768][2] = {
- { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 },
- { 16, 1 }, { 17, 1 }, { 20, 0 }, { 16, 2 },
- { 18, 1 }, { 21, 0 }, { 17, 2 }, { 16, 3 },
- { 19, 1 }, { 22, 0 }, { 18, 2 }, { 17, 3 },
- { 20, 1 }, { 16, 4 }, { 23, 0 }, { 19, 2 },
- { 24, 0 }, { 16, 5 }, { 21, 1 }, { 17, 4 },
- { 18, 3 }, { 20, 2 }, { 17, 5 }, { 16, 6 },
- { 19, 3 }, { 18, 4 }, { 25, 0 }, { 22, 1 },
- { 16, 7 }, { 21, 2 }, { 17, 6 }, { 20, 3 },
- { 26, 0 }, { 18, 5 }, { 19, 4 }, { 17, 7 },
- { 23, 1 }, { 22, 2 }, { 18, 6 }, { 27, 0 },
- { 19, 5 }, { 24, 1 }, { 21, 3 }, { 28, 0 },
- { 20, 4 }, { 18, 7 }, { 19, 6 }, { 23, 2 },
- { 29, 0 }, { 25, 1 }, { 21, 4 }, { 30, 0 },
- { 20, 5 }, { 22, 3 }, { 31, 0 }, { 19, 7 },
- { 24, 2 }, { 26, 1 }, { 20, 6 }, { 21, 5 },
- { 22, 4 }, { 23, 3 }, { 27, 1 }, { 25, 2 },
- { 20, 7 }, { 28, 1 }, { 24, 3 }, { 21, 6 },
- { 22, 5 }, { 23, 4 }, { 26, 2 }, { 21, 7 },
- { 29, 1 }, { 25, 3 }, { 30, 1 }, { 27, 2 },
- { 22, 6 }, { 23, 5 }, { 31, 1 }, { 24, 4 },
- { 26, 3 }, { 28, 2 }, { 22, 7 }, { 23, 6 },
- { 25, 4 }, { 24, 5 }, { 29, 2 }, { 30, 2 },
- { 27, 3 }, { 23, 7 }, { 31, 2 }, { 24, 6 },
- { 26, 4 }, { 25, 5 }, { 28, 3 }, { 24, 7 },
- { 27, 4 }, { 29, 3 }, { 25, 6 }, { 26, 5 },
- { 30, 3 }, { 31, 3 }, { 28, 4 }, { 27, 5 },
- { 25, 7 }, { 29, 4 }, { 26, 6 }, { 28, 5 },
- { 30, 4 }, { 26, 7 }, { 27, 6 }, { 31, 4 },
- { 29, 5 }, { 27, 7 }, { 30, 5 }, { 28, 6 },
- { 31, 5 }, { 29, 6 }, { 28, 7 }, { 30, 6 },
- { 31, 6 }, { 29, 7 }, { 30, 7 }, { 31, 7 },
- { 0, 16 }, { 0, 17 }, { 1, 16 }, { 0, 18 },
- { 1, 17 }, { 0, 19 }, { 2, 16 }, { 1, 18 },
- { 0, 20 }, { 2, 17 }, { 3, 16 }, { 1, 19 },
- { 2, 18 }, { 0, 21 }, { 3, 17 }, { 4, 16 },
- { 1, 20 }, { 2, 19 }, { 0, 22 }, { 3, 18 },
- { 4, 17 }, { 5, 16 }, { 0, 23 }, { 3, 19 },
- { 2, 20 }, { 1, 21 }, { 4, 18 }, { 6, 16 },
- { 5, 17 }, { 3, 20 }, { 2, 21 }, { 1, 22 },
- { 0, 24 }, { 0, 25 }, { 4, 19 }, { 7, 16 },
- { 6, 17 }, { 5, 18 }, { 0, 26 }, { 3, 21 },
- { 2, 22 }, { 1, 23 }, { 4, 20 }, { 5, 19 },
- { 6, 18 }, { 1, 24 }, { 7, 17 }, { 0, 27 },
- { 2, 23 }, { 3, 22 }, { 4, 21 }, { 1, 25 },
- { 5, 20 }, { 7, 18 }, { 0, 28 }, { 6, 19 },
- { 2, 24 }, { 1, 26 }, { 0, 29 }, { 4, 22 },
- { 3, 23 }, { 2, 25 }, { 5, 21 }, { 0, 31 },
- { 7, 19 }, { 6, 20 }, { 0, 30 }, { 1, 27 },
- { 3, 24 }, { 2, 26 }, { 4, 23 }, { 5, 22 },
- { 7, 20 }, { 1, 28 }, { 6, 21 }, { 3, 25 },
- { 2, 27 }, { 1, 29 }, { 4, 24 }, { 2, 28 },
- { 1, 30 }, { 7, 21 }, { 5, 23 }, { 3, 26 },
- { 6, 22 }, { 1, 31 }, { 4, 25 }, { 7, 22 },
- { 3, 27 }, { 2, 29 }, { 2, 30 }, { 5, 24 },
- { 2, 31 }, { 6, 23 }, { 4, 26 }, { 3, 28 },
- { 5, 25 }, { 3, 29 }, { 6, 24 }, { 7, 23 },
- { 3, 30 }, { 4, 27 }, { 3, 31 }, { 5, 26 },
- { 6, 25 }, { 4, 28 }, { 7, 24 }, { 4, 29 },
- { 5, 27 }, { 4, 30 }, { 4, 31 }, { 6, 26 },
- { 5, 28 }, { 7, 25 }, { 6, 27 }, { 5, 29 },
- { 7, 26 }, { 5, 30 }, { 5, 31 }, { 6, 28 },
- { 7, 27 }, { 6, 29 }, { 6, 30 }, { 7, 28 },
- { 6, 31 }, { 7, 29 }, { 7, 30 }, { 7, 31 },
- { 8, 16 }, { 9, 16 }, { 8, 17 }, { 10, 16 },
- { 9, 17 }, { 16, 8 }, { 8, 18 }, { 16, 9 },
- { 10, 17 }, { 11, 16 }, { 17, 8 }, { 9, 18 },
- { 8, 19 }, { 16, 10 }, { 11, 17 }, { 12, 16 },
- { 10, 18 }, { 17, 9 }, { 9, 19 }, { 16, 11 },
- { 8, 20 }, { 18, 8 }, { 17, 10 }, { 10, 19 },
- { 12, 17 }, { 11, 18 }, { 9, 20 }, { 16, 12 },
- { 18, 9 }, { 8, 21 }, { 13, 16 }, { 17, 11 },
- { 19, 8 }, { 18, 10 }, { 13, 17 }, { 16, 13 },
- { 11, 19 }, { 12, 18 }, { 10, 20 }, { 17, 12 },
- { 9, 21 }, { 19, 9 }, { 8, 22 }, { 14, 16 },
- { 18, 11 }, { 11, 20 }, { 10, 21 }, { 20, 8 },
- { 13, 18 }, { 16, 14 }, { 12, 19 }, { 17, 13 },
- { 19, 10 }, { 14, 17 }, { 9, 22 }, { 18, 12 },
- { 8, 23 }, { 17, 14 }, { 20, 9 }, { 15, 16 },
- { 16, 15 }, { 13, 19 }, { 10, 22 }, { 19, 11 },
- { 11, 21 }, { 14, 18 }, { 12, 20 }, { 18, 13 },
- { 20, 10 }, { 21, 8 }, { 15, 17 }, { 9, 23 },
- { 19, 12 }, { 11, 22 }, { 8, 24 }, { 21, 9 },
- { 17, 15 }, { 16, 16 }, { 14, 19 }, { 18, 14 },
- { 12, 21 }, { 13, 20 }, { 20, 11 }, { 10, 23 },
- { 19, 13 }, { 15, 18 }, { 16, 17 }, { 21, 10 },
- { 22, 8 }, { 9, 24 }, { 8, 25 }, { 20, 12 },
- { 15, 19 }, { 11, 23 }, { 17, 16 }, { 18, 15 },
- { 14, 20 }, { 12, 22 }, { 10, 24 }, { 22, 9 },
- { 21, 11 }, { 19, 14 }, { 13, 21 }, { 16, 18 },
- { 9, 25 }, { 17, 17 }, { 8, 26 }, { 20, 13 },
- { 23, 8 }, { 12, 23 }, { 13, 22 }, { 22, 10 },
- { 19, 15 }, { 15, 20 }, { 16, 19 }, { 21, 12 },
- { 11, 24 }, { 14, 21 }, { 8, 27 }, { 18, 16 },
- { 10, 25 }, { 9, 26 }, { 22, 11 }, { 20, 14 },
- { 23, 9 }, { 18, 17 }, { 17, 18 }, { 17, 19 },
- { 19, 16 }, { 21, 13 }, { 10, 26 }, { 12, 24 },
- { 23, 10 }, { 24, 8 }, { 8, 28 }, { 16, 20 },
- { 9, 27 }, { 15, 21 }, { 22, 12 }, { 14, 22 },
- { 13, 23 }, { 20, 15 }, { 11, 25 }, { 24, 9 },
- { 18, 18 }, { 19, 17 }, { 23, 11 }, { 10, 27 },
- { 8, 29 }, { 12, 25 }, { 9, 28 }, { 8, 30 },
- { 21, 14 }, { 13, 24 }, { 11, 26 }, { 25, 8 },
- { 24, 10 }, { 20, 16 }, { 19, 18 }, { 14, 23 },
- { 22, 13 }, { 8, 31 }, { 17, 20 }, { 9, 29 },
- { 23, 12 }, { 15, 22 }, { 25, 9 }, { 11, 27 },
- { 10, 28 }, { 20, 17 }, { 21, 15 }, { 18, 19 },
- { 16, 21 }, { 24, 11 }, { 9, 30 }, { 12, 26 },
- { 10, 29 }, { 22, 14 }, { 14, 24 }, { 9, 31 },
- { 26, 8 }, { 13, 25 }, { 25, 10 }, { 18, 20 },
- { 19, 19 }, { 11, 28 }, { 15, 23 }, { 20, 18 },
- { 10, 30 }, { 12, 27 }, { 17, 21 }, { 23, 13 },
- { 24, 12 }, { 21, 16 }, { 16, 22 }, { 26, 9 },
- { 27, 8 }, { 13, 26 }, { 22, 15 }, { 10, 31 },
- { 14, 25 }, { 12, 28 }, { 25, 11 }, { 21, 17 },
- { 26, 10 }, { 20, 19 }, { 11, 29 }, { 15, 24 },
- { 23, 14 }, { 27, 9 }, { 11, 30 }, { 13, 27 },
- { 19, 20 }, { 24, 13 }, { 28, 8 }, { 11, 31 },
- { 22, 16 }, { 17, 22 }, { 16, 23 }, { 25, 12 },
- { 18, 21 }, { 12, 29 }, { 21, 18 }, { 28, 9 },
- { 27, 10 }, { 26, 11 }, { 29, 8 }, { 14, 26 },
- { 15, 25 }, { 13, 28 }, { 12, 30 }, { 23, 15 },
- { 30, 8 }, { 16, 24 }, { 13, 29 }, { 25, 13 },
- { 24, 14 }, { 20, 20 }, { 31, 8 }, { 12, 31 },
- { 14, 27 }, { 28, 10 }, { 26, 12 }, { 22, 17 },
- { 21, 19 }, { 17, 23 }, { 18, 22 }, { 29, 9 },
- { 27, 11 }, { 19, 21 }, { 27, 12 }, { 30, 9 },
- { 31, 9 }, { 13, 30 }, { 24, 15 }, { 23, 16 },
- { 15, 26 }, { 14, 28 }, { 29, 10 }, { 28, 11 },
- { 26, 13 }, { 17, 24 }, { 13, 31 }, { 25, 14 },
- { 22, 18 }, { 16, 25 }, { 30, 10 }, { 14, 29 },
- { 15, 27 }, { 19, 22 }, { 21, 20 }, { 20, 21 },
- { 27, 13 }, { 29, 11 }, { 18, 23 }, { 23, 17 },
- { 16, 26 }, { 31, 10 }, { 24, 16 }, { 14, 30 },
- { 22, 19 }, { 14, 31 }, { 28, 12 }, { 26, 14 },
- { 30, 11 }, { 15, 28 }, { 25, 15 }, { 17, 25 },
- { 23, 18 }, { 18, 24 }, { 15, 30 }, { 29, 12 },
- { 31, 11 }, { 16, 27 }, { 24, 17 }, { 28, 13 },
- { 19, 23 }, { 15, 29 }, { 25, 16 }, { 17, 26 },
- { 27, 14 }, { 22, 20 }, { 15, 31 }, { 20, 22 },
- { 21, 21 }, { 16, 28 }, { 17, 27 }, { 30, 12 },
- { 26, 15 }, { 19, 24 }, { 18, 25 }, { 23, 19 },
- { 29, 13 }, { 31, 12 }, { 24, 18 }, { 26, 16 },
- { 25, 17 }, { 16, 29 }, { 28, 14 }, { 20, 23 },
- { 18, 26 }, { 21, 22 }, { 19, 25 }, { 22, 21 },
- { 27, 15 }, { 17, 28 }, { 16, 30 }, { 26, 17 },
- { 23, 20 }, { 16, 31 }, { 25, 18 }, { 27, 16 },
- { 20, 24 }, { 24, 19 }, { 31, 13 }, { 30, 13 },
- { 29, 14 }, { 18, 27 }, { 28, 15 }, { 17, 29 },
- { 19, 26 }, { 17, 30 }, { 21, 23 }, { 22, 22 },
- { 30, 14 }, { 20, 25 }, { 23, 21 }, { 17, 31 },
- { 18, 28 }, { 25, 19 }, { 24, 20 }, { 28, 16 },
- { 31, 14 }, { 26, 18 }, { 19, 27 }, { 29, 15 },
- { 27, 17 }, { 30, 15 }, { 21, 24 }, { 22, 23 },
- { 26, 19 }, { 23, 22 }, { 28, 17 }, { 29, 16 },
- { 18, 30 }, { 24, 21 }, { 25, 20 }, { 18, 31 },
- { 18, 29 }, { 20, 26 }, { 19, 28 }, { 27, 18 },
- { 31, 15 }, { 20, 27 }, { 30, 16 }, { 19, 29 },
- { 29, 17 }, { 31, 16 }, { 27, 19 }, { 21, 25 },
- { 28, 18 }, { 26, 20 }, { 22, 24 }, { 25, 21 },
- { 19, 30 }, { 24, 22 }, { 30, 17 }, { 21, 26 },
- { 23, 23 }, { 19, 31 }, { 20, 28 }, { 31, 17 },
- { 28, 19 }, { 27, 20 }, { 21, 27 }, { 29, 18 },
- { 30, 18 }, { 25, 22 }, { 26, 21 }, { 20, 29 },
- { 22, 25 }, { 24, 23 }, { 29, 19 }, { 23, 24 },
- { 20, 31 }, { 20, 30 }, { 28, 20 }, { 21, 28 },
- { 22, 26 }, { 31, 18 }, { 27, 21 }, { 30, 19 },
- { 22, 27 }, { 29, 20 }, { 23, 25 }, { 24, 24 },
- { 26, 22 }, { 21, 29 }, { 25, 23 }, { 31, 19 },
- { 21, 30 }, { 23, 26 }, { 28, 21 }, { 21, 31 },
- { 22, 28 }, { 30, 20 }, { 25, 24 }, { 27, 22 },
- { 29, 21 }, { 26, 23 }, { 24, 25 }, { 31, 20 },
- { 23, 27 }, { 22, 29 }, { 30, 21 }, { 28, 22 },
- { 24, 26 }, { 25, 25 }, { 27, 23 }, { 22, 30 },
- { 23, 28 }, { 22, 31 }, { 26, 24 }, { 31, 21 },
- { 24, 27 }, { 29, 22 }, { 27, 24 }, { 30, 22 },
- { 25, 26 }, { 28, 23 }, { 23, 30 }, { 23, 29 },
- { 24, 28 }, { 25, 27 }, { 31, 22 }, { 23, 31 },
- { 26, 25 }, { 28, 24 }, { 29, 23 }, { 24, 29 },
- { 24, 30 }, { 27, 25 }, { 25, 28 }, { 26, 26 },
- { 30, 23 }, { 26, 27 }, { 31, 23 }, { 28, 25 },
- { 27, 26 }, { 25, 29 }, { 24, 31 }, { 29, 24 },
- { 30, 24 }, { 27, 27 }, { 29, 25 }, { 26, 28 },
- { 31, 24 }, { 25, 30 }, { 25, 31 }, { 28, 26 },
- { 27, 28 }, { 26, 29 }, { 30, 25 }, { 29, 26 },
- { 28, 27 }, { 26, 30 }, { 31, 25 }, { 27, 29 },
- { 26, 31 }, { 30, 26 }, { 28, 28 }, { 31, 26 },
- { 29, 27 }, { 27, 30 }, { 28, 29 }, { 27, 31 },
- { 30, 27 }, { 31, 27 }, { 28, 30 }, { 29, 28 },
- { 30, 28 }, { 29, 29 }, { 30, 29 }, { 31, 28 },
- { 28, 31 }, { 29, 30 }, { 29, 31 }, { 31, 29 },
- { 30, 30 }, { 30, 31 }, { 31, 30 }, { 31, 31 }
-};
diff --git a/third_party/aom/av1/common/zigzag4.c b/third_party/aom/av1/common/zigzag4.c
deleted file mode 100644
index 1fb5a320b..000000000
--- a/third_party/aom/av1/common/zigzag4.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* This file is generated by gen_zigzag4.m */
-
-/* clang-format off */
-
-#include "odintrin.h"
-OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_DCT[15][2] = {
- {0, 1}, {1, 0}, {1, 1}, {0, 2},
- {2, 0}, {0, 3}, {1, 2}, {3, 0},
- {2, 1}, {1, 3}, {2, 2}, {3, 1},
- {2, 3}, {3, 2}, {3, 3} };
-
-OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_DCT[15][2] = {
- {1, 0}, {0, 1}, {2, 0}, {1, 1},
- {3, 0}, {2, 1}, {0, 2}, {1, 2},
- {3, 1}, {0, 3}, {2, 2}, {1, 3},
- {3, 2}, {2, 3}, {3, 3} };
-
-OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_ADST[15][2] = {
- {0, 1}, {0, 2}, {1, 0}, {0, 3},
- {1, 1}, {1, 2}, {2, 0}, {1, 3},
- {2, 1}, {2, 2}, {3, 0}, {3, 1},
- {2, 3}, {3, 2}, {3, 3} };
diff --git a/third_party/aom/av1/common/zigzag8.c b/third_party/aom/av1/common/zigzag8.c
deleted file mode 100644
index 3f11e0c03..000000000
--- a/third_party/aom/av1/common/zigzag8.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/* This file is generated by gen_zigzag8.m */
-
-/* clang-format off */
-
-#include "odintrin.h"
-
-OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_DCT[48][2] = {
- {4, 0}, {4, 1}, {5, 0}, {5, 1},
- {6, 0}, {7, 0}, {6, 1}, {7, 1},
- {0, 4}, {1, 4}, {0, 5}, {1, 5},
- {0, 6}, {1, 6}, {0, 7}, {1, 7},
- {2, 4}, {4, 2}, {3, 4}, {2, 5},
- {4, 3}, {5, 2}, {4, 4}, {3, 5},
- {5, 3}, {2, 6}, {4, 5}, {6, 2},
- {5, 4}, {3, 6}, {2, 7}, {6, 3},
- {5, 5}, {7, 2}, {4, 6}, {3, 7},
- {6, 4}, {7, 3}, {4, 7}, {5, 6},
- {6, 5}, {7, 4}, {5, 7}, {6, 6},
- {7, 5}, {6, 7}, {7, 6}, {7, 7}
- };
-
-OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_DCT[48][2] = {
- {4, 0}, {5, 0}, {4, 1}, {6, 0},
- {5, 1}, {7, 0}, {6, 1}, {7, 1},
- {0, 4}, {1, 4}, {0, 5}, {1, 5},
- {0, 6}, {1, 6}, {0, 7}, {1, 7},
- {4, 2}, {2, 4}, {5, 2}, {4, 3},
- {3, 4}, {2, 5}, {5, 3}, {4, 4},
- {6, 2}, {3, 5}, {5, 4}, {2, 6},
- {4, 5}, {6, 3}, {7, 2}, {3, 6},
- {2, 7}, {5, 5}, {6, 4}, {4, 6},
- {7, 3}, {3, 7}, {5, 6}, {6, 5},
- {4, 7}, {7, 4}, {5, 7}, {7, 5},
- {6, 6}, {7, 6}, {6, 7}, {7, 7}
- };
-
-OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_ADST[48][2] = {
- {4, 0}, {4, 1}, {5, 0}, {5, 1},
- {6, 0}, {6, 1}, {7, 0}, {7, 1},
- {0, 4}, {0, 5}, {1, 4}, {0, 6},
- {1, 5}, {0, 7}, {1, 6}, {1, 7},
- {2, 4}, {2, 5}, {3, 4}, {4, 2},
- {2, 6}, {4, 3}, {3, 5}, {4, 4},
- {2, 7}, {3, 6}, {5, 2}, {4, 5},
- {5, 3}, {3, 7}, {5, 4}, {4, 6},
- {6, 2}, {5, 5}, {4, 7}, {6, 3},
- {6, 4}, {5, 6}, {7, 2}, {6, 5},
- {7, 3}, {5, 7}, {7, 4}, {6, 6},
- {7, 5}, {6, 7}, {7, 6}, {7, 7}
- };